[clang] d04d922 - [OPENMP]Fix PR50347: Mapping of global scope deep object fails.

Alexey Bataev via cfe-commits cfe-commits at lists.llvm.org
Fri Feb 25 10:58:50 PST 2022


Author: Alexey Bataev
Date: 2022-02-25T10:54:24-08:00
New Revision: d04d9220e1c3d1b72cd2c7401fe684af16973bcc

URL: https://github.com/llvm/llvm-project/commit/d04d9220e1c3d1b72cd2c7401fe684af16973bcc
DIFF: https://github.com/llvm/llvm-project/commit/d04d9220e1c3d1b72cd2c7401fe684af16973bcc.diff

LOG: [OPENMP]Fix PR50347: Mapping of global scope deep object fails.

Changed the we handle llvm::Constants in sizes arrays. ConstExprs and
GlobalValues cannot be used as initializers, need to put them at the
runtime, otherwise there wight be the compilation errors.

Differential Revision: https://reviews.llvm.org/D105297

Added: 
    

Modified: 
    clang/lib/CodeGen/CGOpenMPRuntime.cpp
    clang/test/OpenMP/reduction_implicit_map.cpp
    clang/test/OpenMP/target_codegen.cpp
    clang/test/OpenMP/target_data_codegen.cpp
    clang/test/OpenMP/target_data_map_codegen_hold.cpp
    clang/test/OpenMP/target_data_map_pointer_array_subscript_codegen.cpp
    clang/test/OpenMP/target_data_use_device_addr_codegen.cpp
    clang/test/OpenMP/target_defaultmap_codegen_01.cpp
    clang/test/OpenMP/target_enter_data_codegen.cpp
    clang/test/OpenMP/target_enter_data_depend_codegen.cpp
    clang/test/OpenMP/target_exit_data_codegen.cpp
    clang/test/OpenMP/target_exit_data_depend_codegen.cpp
    clang/test/OpenMP/target_firstprivate_codegen.cpp
    clang/test/OpenMP/target_map_codegen_12.cpp
    clang/test/OpenMP/target_map_codegen_13.cpp
    clang/test/OpenMP/target_map_codegen_14.cpp
    clang/test/OpenMP/target_map_codegen_18.inc
    clang/test/OpenMP/target_map_codegen_20.cpp
    clang/test/OpenMP/target_map_codegen_23.cpp
    clang/test/OpenMP/target_map_codegen_28.cpp
    clang/test/OpenMP/target_map_codegen_29.cpp
    clang/test/OpenMP/target_map_codegen_31.cpp
    clang/test/OpenMP/target_map_codegen_32.cpp
    clang/test/OpenMP/target_map_codegen_34.cpp
    clang/test/OpenMP/target_map_codegen_35.cpp
    clang/test/OpenMP/target_map_codegen_hold.cpp
    clang/test/OpenMP/target_map_member_expr_array_section_codegen.cpp
    clang/test/OpenMP/target_parallel_codegen.cpp
    clang/test/OpenMP/target_parallel_for_codegen.cpp
    clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
    clang/test/OpenMP/target_simd_codegen.cpp
    clang/test/OpenMP/target_teams_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp
    clang/test/OpenMP/target_update_codegen.cpp
    clang/test/OpenMP/target_update_depend_codegen.cpp
    clang/test/OpenMP/teams_distribute_codegen.cpp
    clang/test/OpenMP/teams_distribute_collapse_codegen.cpp
    clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp
    clang/test/OpenMP/teams_distribute_simd_codegen.cpp
    clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp
    clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp
    clang/test/OpenMP/teams_firstprivate_codegen.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index c02641c4cba97..c392e37452a78 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -29,6 +29,7 @@
 #include "clang/CodeGen/ConstantInitBuilder.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/IR/Constants.h"
@@ -9602,12 +9603,6 @@ static void emitOffloadingArrays(
   if (Info.NumberOfPtrs) {
     // Detect if we have any capture size requiring runtime evaluation of the
     // size so that a constant array could be eventually used.
-    bool hasRuntimeEvaluationCaptureSize = false;
-    for (llvm::Value *S : CombinedInfo.Sizes)
-      if (!isa<llvm::Constant>(S)) {
-        hasRuntimeEvaluationCaptureSize = true;
-        break;
-      }
 
     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
     QualType PointerArrayType = Ctx.getConstantArrayType(
@@ -9627,35 +9622,56 @@ static void emitOffloadingArrays(
     // need to fill up the arrays as we do for the pointers.
     QualType Int64Ty =
         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
-    if (hasRuntimeEvaluationCaptureSize) {
+    SmallVector<llvm::Constant *> ConstSizes(
+        CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0));
+    llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size());
+    for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
+      if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) {
+        if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) {
+          if (IsNonContiguous && (CombinedInfo.Types[I] &
+                                  MappableExprsHandler::OMP_MAP_NON_CONTIG))
+            ConstSizes[I] = llvm::ConstantInt::get(
+                CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]);
+          else
+            ConstSizes[I] = CI;
+          continue;
+        }
+      }
+      RuntimeSizes.set(I);
+    }
+
+    if (RuntimeSizes.all()) {
       QualType SizeArrayType = Ctx.getConstantArrayType(
           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
           /*IndexTypeQuals=*/0);
       Info.SizesArray =
           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
     } else {
-      // We expect all the sizes to be constant, so we collect them to create
-      // a constant array.
-      SmallVector<llvm::Constant *, 16> ConstSizes;
-      for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
-        if (IsNonContiguous &&
-            (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
-          ConstSizes.push_back(llvm::ConstantInt::get(
-              CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
-        } else {
-          ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
-        }
-      }
-
       auto *SizesArrayInit = llvm::ConstantArray::get(
           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
       auto *SizesArrayGbl = new llvm::GlobalVariable(
-          CGM.getModule(), SizesArrayInit->getType(),
-          /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
-          SizesArrayInit, Name);
+          CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true,
+          llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name);
       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
-      Info.SizesArray = SizesArrayGbl;
+      if (RuntimeSizes.any()) {
+        QualType SizeArrayType = Ctx.getConstantArrayType(
+            Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
+            /*IndexTypeQuals=*/0);
+        Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes");
+        llvm::Value *GblConstPtr =
+            CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+                SizesArrayGbl, CGM.Int64Ty->getPointerTo());
+        CGF.Builder.CreateMemCpy(
+            Buffer,
+            Address(GblConstPtr, CGM.Int64Ty,
+                    CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth(
+                        /*DestWidth=*/64, /*Signed=*/false))),
+            CGF.getTypeSize(SizeArrayType));
+        Info.SizesArray = Buffer.getPointer();
+      } else {
+        Info.SizesArray = SizesArrayGbl;
+      }
     }
 
     // The map types are always constant so we don't need to generate code to
@@ -9729,7 +9745,7 @@ static void emitOffloadingArrays(
           Address::deprecated(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
       CGF.Builder.CreateStore(PVal, PAddr);
 
-      if (hasRuntimeEvaluationCaptureSize) {
+      if (RuntimeSizes.test(I)) {
         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
             Info.SizesArray,

diff  --git a/clang/test/OpenMP/reduction_implicit_map.cpp b/clang/test/OpenMP/reduction_implicit_map.cpp
index 2d924820fbf77..80b27e6df1fb9 100644
--- a/clang/test/OpenMP/reduction_implicit_map.cpp
+++ b/clang/test/OpenMP/reduction_implicit_map.cpp
@@ -1,3 +1,4 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \
 // RUN:  -triple powerpc64le-unknown-unknown -DCUDA \
 // RUN:  -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o \
@@ -34,7 +35,6 @@ int foo(int n) {
   return 0;
 }
 // CHECK-NOT @.offload_maptypes
-// CHECK: call void @__kmpc_nvptx_end_reduce_nowait(
 #elif defined(DIAG)
 class S2 {
   mutable int a;
@@ -57,11 +57,7 @@ int bar() {
   return 0;
 }
 // map for variable o
-// CHECK1: offload_sizes = private unnamed_addr constant [1 x i64] [i64 4]
-// CHECK1: offload_maptypes = private unnamed_addr constant [1 x i64] [i64 547]
 // map for b:
-// CHECK1: @.offload_sizes{{.*}} = private unnamed_addr constant [1 x i64] [i64 8000]
-// CHECK1: @.offload_maptypes{{.*}} = private unnamed_addr constant [1 x i64] [i64 547]
 #else
 // expected-no-diagnostics
 
@@ -86,25 +82,6 @@ void sum(int* input, int size, int* output)
   for (int i = 0; i < size; i++)
     ;
 }
-//CHECK2: @.offload_sizes = private unnamed_addr constant [2 x i64] [i64 4, i64 8]
-//CHECK2: @.offload_maptypes.10 = private unnamed_addr constant [2 x i64] [i64 800, i64 547]
-//CHECK2: @.offload_sizes.13 = private unnamed_addr constant [2 x i64] [i64 4, i64 4]
-//CHECK2: @.offload_maptypes.14 = private unnamed_addr constant [2 x i64] [i64 800, i64 547]
-//CHECK2: define dso_local void @_Z3sumPiiS_
-//CHECK2-NEXT: entry
-//CHECK2-NEXT: [[INP:%.*]] = alloca i32*
-//CHECK2-NEXT: [[SIZE:%.*]] = alloca i32
-//CHECK2-NEXT: [[OUTP:%.*]] = alloca i32*
-//CHECK2:      [[OFFSIZE:%.*]] = alloca [3 x i64]
-//CHECK2:      [[OFFSIZE10:%.*]] = alloca [3 x i64]
-//CHECK2:      [[T15:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[OFFSIZE]], i32 0, i32 0
-//CHECK2-NEXT: store i64 4, i64* [[T15]]
-//CHECK2:      [[T21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[OFFSIZE]], i32 0, i32 1
-//CHECK2-NEXT: store i64 4, i64* [[T21]]
-//CHECK2:     [[T53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[OFFSIZE10]], i32 0, i32 0
-//CHECK2-NEXT: store i64 4, i64* [[T53]]
-//CHECK2:     [[T59:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[OFFSIZE10]], i32 0, i32 1
-//CHECK2-NEXT: store i64 12, i64* [[T59]]
 #endif
 int main()
 {
@@ -120,3 +97,1997 @@ int main()
 #endif
   return 0;
 }
+// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32
+// CHECK-SAME: (double* noundef [[E:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[E_ADDR:%.*]] = alloca double*, align 8
+// CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
+// CHECK-NEXT:    store double* [[E]], double** [[E_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true)
+// CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+// CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// CHECK:       user_code.entry:
+// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]])
+// CHECK-NEXT:    [[TMP2:%.*]] = load double*, double** [[E_ADDR]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[TMP2]] to i8*
+// CHECK-NEXT:    store i8* [[TMP4]], i8** [[TMP3]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+// CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, double*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i64 1)
+// CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true)
+// CHECK-NEXT:    ret void
+// CHECK:       worker.exit:
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__
+// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef [[E:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// CHECK-NEXT:    [[E_ADDR:%.*]] = alloca double*, align 8
+// CHECK-NEXT:    [[E2:%.*]] = alloca double, align 8
+// CHECK-NEXT:    [[TMP:%.*]] = alloca double*, align 8
+// CHECK-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
+// CHECK-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// CHECK-NEXT:    store double* [[E]], double** [[E_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 8
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load double*, double** [[E_ADDR]], align 8
+// CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP1]], i64 0
+// CHECK-NEXT:    store double 0.000000e+00, double* [[E2]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load double*, double** [[E_ADDR]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint double* [[TMP2]] to i64
+// CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint double* [[ARRAYIDX]] to i64
+// CHECK-NEXT:    [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]]
+// CHECK-NEXT:    [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (double* getelementptr (double, double* null, i32 1) to i64)
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr double, double* [[E2]], i64 [[TMP6]]
+// CHECK-NEXT:    store double* [[TMP7]], double** [[TMP]], align 8
+// CHECK-NEXT:    [[TMP8:%.*]] = load double*, double** [[TMP]], align 8
+// CHECK-NEXT:    store double 1.000000e+01, double* [[TMP8]], align 8
+// CHECK-NEXT:    [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4
+// CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// CHECK-NEXT:    [[TMP12:%.*]] = bitcast double* [[E2]] to i8*
+// CHECK-NEXT:    store i8* [[TMP12]], i8** [[TMP11]], align 8
+// CHECK-NEXT:    [[TMP13:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// CHECK-NEXT:    [[TMP14:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]], i32 1, i64 8, i8* [[TMP13]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func)
+// CHECK-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 1
+// CHECK-NEXT:    br i1 [[TMP15]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
+// CHECK:       .omp.reduction.then:
+// CHECK-NEXT:    [[TMP16:%.*]] = load double, double* [[ARRAYIDX]], align 8
+// CHECK-NEXT:    [[TMP17:%.*]] = load double, double* [[E2]], align 8
+// CHECK-NEXT:    [[ADD:%.*]] = fadd double [[TMP16]], [[TMP17]]
+// CHECK-NEXT:    store double [[ADD]], double* [[ARRAYIDX]], align 8
+// CHECK-NEXT:    call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP10]])
+// CHECK-NEXT:    br label [[DOTOMP_REDUCTION_DONE]]
+// CHECK:       .omp.reduction.done:
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func
+// CHECK-SAME: (i8* noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[DOTADDR1:%.*]] = alloca i16, align 2
+// CHECK-NEXT:    [[DOTADDR2:%.*]] = alloca i16, align 2
+// CHECK-NEXT:    [[DOTADDR3:%.*]] = alloca i16, align 2
+// CHECK-NEXT:    [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 8
+// CHECK-NEXT:    [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8
+// CHECK-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK-NEXT:    store i16 [[TMP1]], i16* [[DOTADDR1]], align 2
+// CHECK-NEXT:    store i16 [[TMP2]], i16* [[DOTADDR2]], align 2
+// CHECK-NEXT:    store i16 [[TMP3]], i16* [[DOTADDR3]], align 2
+// CHECK-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// CHECK-NEXT:    [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2
+// CHECK-NEXT:    [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2
+// CHECK-NEXT:    [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
+// CHECK-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
+// CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK-NEXT:    [[TMP12:%.*]] = bitcast i8* [[TMP10]] to double*
+// CHECK-NEXT:    [[TMP13:%.*]] = getelementptr double, double* [[TMP12]], i64 1
+// CHECK-NEXT:    [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8*
+// CHECK-NEXT:    [[TMP15:%.*]] = bitcast double* [[TMP12]] to i64*
+// CHECK-NEXT:    [[TMP16:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i64*
+// CHECK-NEXT:    [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 8
+// CHECK-NEXT:    [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK-NEXT:    [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16
+// CHECK-NEXT:    [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]])
+// CHECK-NEXT:    store i64 [[TMP20]], i64* [[TMP16]], align 8
+// CHECK-NEXT:    [[TMP21:%.*]] = getelementptr i64, i64* [[TMP15]], i64 1
+// CHECK-NEXT:    [[TMP22:%.*]] = getelementptr i64, i64* [[TMP16]], i64 1
+// CHECK-NEXT:    [[TMP23:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i8*
+// CHECK-NEXT:    store i8* [[TMP23]], i8** [[TMP11]], align 8
+// CHECK-NEXT:    [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0
+// CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1
+// CHECK-NEXT:    [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]]
+// CHECK-NEXT:    [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]]
+// CHECK-NEXT:    [[TMP28:%.*]] = icmp eq i16 [[TMP8]], 2
+// CHECK-NEXT:    [[TMP29:%.*]] = and i16 [[TMP6]], 1
+// CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i16 [[TMP29]], 0
+// CHECK-NEXT:    [[TMP31:%.*]] = and i1 [[TMP28]], [[TMP30]]
+// CHECK-NEXT:    [[TMP32:%.*]] = icmp sgt i16 [[TMP7]], 0
+// CHECK-NEXT:    [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]]
+// CHECK-NEXT:    [[TMP34:%.*]] = or i1 [[TMP24]], [[TMP27]]
+// CHECK-NEXT:    [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]]
+// CHECK-NEXT:    br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]]
+// CHECK:       then:
+// CHECK-NEXT:    [[TMP36:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8*
+// CHECK-NEXT:    [[TMP37:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8*
+// CHECK-NEXT:    call void @"_omp$reduction$reduction_func"(i8* [[TMP36]], i8* [[TMP37]]) #[[ATTR3:[0-9]+]]
+// CHECK-NEXT:    br label [[IFCONT:%.*]]
+// CHECK:       else:
+// CHECK-NEXT:    br label [[IFCONT]]
+// CHECK:       ifcont:
+// CHECK-NEXT:    [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1
+// CHECK-NEXT:    [[TMP39:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]]
+// CHECK-NEXT:    [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]]
+// CHECK-NEXT:    br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]]
+// CHECK:       then4:
+// CHECK-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK-NEXT:    [[TMP42:%.*]] = load i8*, i8** [[TMP41]], align 8
+// CHECK-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
+// CHECK-NEXT:    [[TMP44:%.*]] = load i8*, i8** [[TMP43]], align 8
+// CHECK-NEXT:    [[TMP45:%.*]] = bitcast i8* [[TMP42]] to double*
+// CHECK-NEXT:    [[TMP46:%.*]] = bitcast i8* [[TMP44]] to double*
+// CHECK-NEXT:    [[TMP47:%.*]] = load double, double* [[TMP45]], align 8
+// CHECK-NEXT:    store double [[TMP47]], double* [[TMP46]], align 8
+// CHECK-NEXT:    br label [[IFCONT6:%.*]]
+// CHECK:       else5:
+// CHECK-NEXT:    br label [[IFCONT6]]
+// CHECK:       ifcont6:
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func
+// CHECK-SAME: (i8* noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTCNT_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+// CHECK-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK-NEXT:    [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31
+// CHECK-NEXT:    [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK-NEXT:    [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
+// CHECK-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [1 x i8*]*
+// CHECK-NEXT:    store i32 0, i32* [[DOTCNT_ADDR]], align 4
+// CHECK-NEXT:    br label [[PRECOND:%.*]]
+// CHECK:       precond:
+// CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4
+// CHECK-NEXT:    [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2
+// CHECK-NEXT:    br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]]
+// CHECK:       body:
+// CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP2]])
+// CHECK-NEXT:    [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
+// CHECK-NEXT:    br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
+// CHECK:       then:
+// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0
+// CHECK-NEXT:    [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 8
+// CHECK-NEXT:    [[TMP12:%.*]] = bitcast i8* [[TMP11]] to i32*
+// CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 [[TMP8]]
+// CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 4
+// CHECK-NEXT:    store volatile i32 [[TMP15]], i32 addrspace(3)* [[TMP14]], align 4
+// CHECK-NEXT:    br label [[IFCONT:%.*]]
+// CHECK:       else:
+// CHECK-NEXT:    br label [[IFCONT]]
+// CHECK:       ifcont:
+// CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]])
+// CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTADDR1]], align 4
+// CHECK-NEXT:    [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]]
+// CHECK-NEXT:    br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
+// CHECK:       then2:
+// CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
+// CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0
+// CHECK-NEXT:    [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8
+// CHECK-NEXT:    [[TMP20:%.*]] = bitcast i8* [[TMP19]] to i32*
+// CHECK-NEXT:    [[TMP21:%.*]] = getelementptr i32, i32* [[TMP20]], i32 [[TMP8]]
+// CHECK-NEXT:    [[TMP22:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP17]], align 4
+// CHECK-NEXT:    store i32 [[TMP22]], i32* [[TMP21]], align 4
+// CHECK-NEXT:    br label [[IFCONT4:%.*]]
+// CHECK:       else3:
+// CHECK-NEXT:    br label [[IFCONT4]]
+// CHECK:       ifcont4:
+// CHECK-NEXT:    [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK-NEXT:    store i32 [[TMP23]], i32* [[DOTCNT_ADDR]], align 4
+// CHECK-NEXT:    br label [[PRECOND]]
+// CHECK:       exit:
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_Z3barv
+// CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[O:%.*]] = alloca [5 x %class.S2], align 4
+// CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 8
+// CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 8
+// CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x i8*], align 8
+// CHECK1-NEXT:    [[B:%.*]] = alloca [10 x [10 x [10 x double]]], align 8
+// CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x i8*], align 8
+// CHECK1-NEXT:    [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x i8*], align 8
+// CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x i8*], align 8
+// CHECK1-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %class.S2], [5 x %class.S2]* [[O]], i32 0, i32 0
+// CHECK1-NEXT:    [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[CLASS_S2:%.*]], %class.S2* [[ARRAY_BEGIN]], i64 5
+// CHECK1-NEXT:    br label [[ARRAYCTOR_LOOP:%.*]]
+// CHECK1:       arrayctor.loop:
+// CHECK1-NEXT:    [[ARRAYCTOR_CUR:%.*]] = phi %class.S2* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ]
+// CHECK1-NEXT:    call void @_ZN2S2C1Ev(%class.S2* noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]])
+// CHECK1-NEXT:    [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_S2]], %class.S2* [[ARRAYCTOR_CUR]], i64 1
+// CHECK1-NEXT:    [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.S2* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]]
+// CHECK1-NEXT:    br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]]
+// CHECK1:       arrayctor.cont:
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %class.S2], [5 x %class.S2]* [[O]], i64 0, i64 0
+// CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP1:%.*]] = bitcast i8** [[TMP0]] to [5 x %class.S2]**
+// CHECK1-NEXT:    store [5 x %class.S2]* [[O]], [5 x %class.S2]** [[TMP1]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8** [[TMP2]] to %class.S2**
+// CHECK1-NEXT:    store %class.S2* [[ARRAYIDX]], %class.S2** [[TMP3]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT:    store i8* null, i8** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l50.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK1-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
+// CHECK1-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1:       omp_offload.failed:
+// CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l50([5 x %class.S2]* [[O]]) #[[ATTR8:[0-9]+]]
+// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
+// CHECK1:       omp_offload.cont:
+// CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x [10 x [10 x double]]]**
+// CHECK1-NEXT:    store [10 x [10 x [10 x double]]]* [[B]], [10 x [10 x [10 x double]]]** [[TMP10]], align 8
+// CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to [10 x [10 x [10 x double]]]**
+// CHECK1-NEXT:    store [10 x [10 x [10 x double]]]* [[B]], [10 x [10 x [10 x double]]]** [[TMP12]], align 8
+// CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0
+// CHECK1-NEXT:    store i8* null, i8** [[TMP13]], align 8
+// CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l55.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.3, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK1-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
+// CHECK1-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED4:%.*]], label [[OMP_OFFLOAD_CONT5:%.*]]
+// CHECK1:       omp_offload.failed4:
+// CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l55([10 x [10 x [10 x double]]]* [[B]]) #[[ATTR8]]
+// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT5]]
+// CHECK1:       omp_offload.cont5:
+// CHECK1-NEXT:    ret i32 0
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_ZN2S2C1Ev
+// CHECK1-SAME: (%class.S2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %class.S2*, align 8
+// CHECK1-NEXT:    store %class.S2* [[THIS]], %class.S2** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %class.S2*, %class.S2** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    call void @_ZN2S2C2Ev(%class.S2* noundef nonnull align 4 dereferenceable(4) [[THIS1]])
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l50
+// CHECK1-SAME: ([5 x %class.S2]* noundef nonnull align 4 dereferenceable(20) [[O:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[O_ADDR:%.*]] = alloca [5 x %class.S2]*, align 8
+// CHECK1-NEXT:    store [5 x %class.S2]* [[O]], [5 x %class.S2]** [[O_ADDR]], align 8
+// CHECK1-NEXT:    [[TMP0:%.*]] = load [5 x %class.S2]*, [5 x %class.S2]** [[O_ADDR]], align 8
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [5 x %class.S2]*)* @.omp_outlined. to void (i32*, i32*, ...)*), [5 x %class.S2]* [[TMP0]])
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.
+// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [5 x %class.S2]* noundef nonnull align 4 dereferenceable(20) [[O:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// CHECK1-NEXT:    [[O_ADDR:%.*]] = alloca [5 x %class.S2]*, align 8
+// CHECK1-NEXT:    [[O1:%.*]] = alloca [[CLASS_S2:%.*]], align 4
+// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
+// CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// CHECK1-NEXT:    store [5 x %class.S2]* [[O]], [5 x %class.S2]** [[O_ADDR]], align 8
+// CHECK1-NEXT:    [[TMP0:%.*]] = load [5 x %class.S2]*, [5 x %class.S2]** [[O_ADDR]], align 8
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %class.S2], [5 x %class.S2]* [[TMP0]], i64 0, i64 0
+// CHECK1-NEXT:    call void @_ZN2S2C1Ev(%class.S2* noundef nonnull align 4 dereferenceable(4) [[O1]])
+// CHECK1-NEXT:    [[TMP1:%.*]] = bitcast [5 x %class.S2]* [[TMP0]] to %class.S2*
+// CHECK1-NEXT:    [[TMP2:%.*]] = ptrtoint %class.S2* [[TMP1]] to i64
+// CHECK1-NEXT:    [[TMP3:%.*]] = ptrtoint %class.S2* [[ARRAYIDX]] to i64
+// CHECK1-NEXT:    [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]]
+// CHECK1-NEXT:    [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (%class.S2* getelementptr ([[CLASS_S2]], %class.S2* null, i32 1) to i64)
+// CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr [[CLASS_S2]], %class.S2* [[O1]], i64 [[TMP5]]
+// CHECK1-NEXT:    [[TMP7:%.*]] = bitcast %class.S2* [[TMP6]] to [5 x %class.S2]*
+// CHECK1-NEXT:    store i32 0, i32* [[I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK1:       for.cond:
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, i32* [[I]], align 4
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP8]], 10
+// CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK1:       for.body:
+// CHECK1-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK1:       for.inc:
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, i32* [[I]], align 4
+// CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP9]], 1
+// CHECK1-NEXT:    store i32 [[INC]], i32* [[I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK1:       for.end:
+// CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// CHECK1-NEXT:    [[TMP11:%.*]] = bitcast %class.S2* [[O1]] to i8*
+// CHECK1-NEXT:    store i8* [[TMP11]], i8** [[TMP10]], align 8
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+// CHECK1-NEXT:    [[TMP14:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// CHECK1-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 1, i64 8, i8* [[TMP14]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT:    switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK1-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// CHECK1-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// CHECK1-NEXT:    ]
+// CHECK1:       .omp.reduction.case1:
+// CHECK1-NEXT:    [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %class.S2* @_ZN2S2plERS_(%class.S2* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX]], %class.S2* noundef nonnull align 4 dereferenceable(4) [[O1]])
+// CHECK1-NEXT:    [[TMP16:%.*]] = bitcast %class.S2* [[ARRAYIDX]] to i8*
+// CHECK1-NEXT:    [[TMP17:%.*]] = bitcast %class.S2* [[CALL]] to i8*
+// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP16]], i8* align 4 [[TMP17]], i64 4, i1 false)
+// CHECK1-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP13]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK1:       .omp.reduction.case2:
+// CHECK1-NEXT:    [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT:    [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4
+// CHECK1-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP19]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// CHECK1-NEXT:    [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) %class.S2* @_ZN2S2plERS_(%class.S2* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX]], %class.S2* noundef nonnull align 4 dereferenceable(4) [[O1]])
+// CHECK1-NEXT:    [[TMP20:%.*]] = bitcast %class.S2* [[ARRAYIDX]] to i8*
+// CHECK1-NEXT:    [[TMP21:%.*]] = bitcast %class.S2* [[CALL2]] to i8*
+// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false)
+// CHECK1-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB2]], i32 [[TMP19]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK1:       .omp.reduction.default:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
+// CHECK1-SAME: (i8* noundef [[TMP0:%.*]], i8* noundef [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
+// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// CHECK1-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %class.S2*
+// CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
+// CHECK1-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %class.S2*
+// CHECK1-NEXT:    [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %class.S2* @_ZN2S2plERS_(%class.S2* noundef nonnull align 4 dereferenceable(4) [[TMP11]], %class.S2* noundef nonnull align 4 dereferenceable(4) [[TMP8]])
+// CHECK1-NEXT:    [[TMP12:%.*]] = bitcast %class.S2* [[TMP11]] to i8*
+// CHECK1-NEXT:    [[TMP13:%.*]] = bitcast %class.S2* [[CALL]] to i8*
+// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 4, i1 false)
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l55
+// CHECK1-SAME: ([10 x [10 x [10 x double]]]* noundef nonnull align 8 dereferenceable(8000) [[B:%.*]]) #[[ATTR2]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[B_ADDR:%.*]] = alloca [10 x [10 x [10 x double]]]*, align 8
+// CHECK1-NEXT:    store [10 x [10 x [10 x double]]]* [[B]], [10 x [10 x [10 x double]]]** [[B_ADDR]], align 8
+// CHECK1-NEXT:    [[TMP0:%.*]] = load [10 x [10 x [10 x double]]]*, [10 x [10 x [10 x double]]]** [[B_ADDR]], align 8
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x [10 x [10 x double]]]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x [10 x [10 x double]]]* [[TMP0]])
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1
+// CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x [10 x [10 x double]]]* noundef nonnull align 8 dereferenceable(8000) [[B:%.*]]) #[[ATTR3]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// CHECK1-NEXT:    [[B_ADDR:%.*]] = alloca [10 x [10 x [10 x double]]]*, align 8
+// CHECK1-NEXT:    [[DOTOMP_IV:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[TMP:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_LB:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_UB:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTRD_INPUT_:%.*]] = alloca [1 x %struct.kmp_taskred_input_t], align 8
+// CHECK1-NEXT:    [[DOTTASK_RED_:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    [[I:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8
+// CHECK1-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca double, align 8
+// CHECK1-NEXT:    [[_TMP30:%.*]] = alloca double, align 8
+// CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// CHECK1-NEXT:    store [10 x [10 x [10 x double]]]* [[B]], [10 x [10 x [10 x double]]]** [[B_ADDR]], align 8
+// CHECK1-NEXT:    [[TMP0:%.*]] = load [10 x [10 x [10 x double]]]*, [10 x [10 x [10 x double]]]** [[B_ADDR]], align 8
+// CHECK1-NEXT:    store i64 0, i64* [[DOTOMP_LB]], align 8
+// CHECK1-NEXT:    store i64 9, i64* [[DOTOMP_UB]], align 8
+// CHECK1-NEXT:    store i64 1, i64* [[DOTOMP_STRIDE]], align 8
+// CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], [10 x [10 x [10 x double]]]* [[TMP0]], i64 0, i64 0
+// CHECK1-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x [10 x double]], [10 x [10 x double]]* [[ARRAYIDX]], i64 0, i64 0
+// CHECK1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYDECAY]], i64 2
+// CHECK1-NEXT:    [[ARRAYDECAY2:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX1]], i64 0, i64 0
+// CHECK1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[ARRAYDECAY2]], i64 1
+// CHECK1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], [10 x [10 x [10 x double]]]* [[TMP0]], i64 0, i64 1
+// CHECK1-NEXT:    [[ARRAYDECAY5:%.*]] = getelementptr inbounds [10 x [10 x double]], [10 x [10 x double]]* [[ARRAYIDX4]], i64 0, i64 0
+// CHECK1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYDECAY5]], i64 5
+// CHECK1-NEXT:    [[ARRAYDECAY7:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX6]], i64 0, i64 0
+// CHECK1-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[ARRAYDECAY7]], i64 1
+// CHECK1-NEXT:    [[TMP1:%.*]] = ptrtoint double* [[ARRAYIDX8]] to i64
+// CHECK1-NEXT:    [[TMP2:%.*]] = ptrtoint double* [[ARRAYIDX3]] to i64
+// CHECK1-NEXT:    [[TMP3:%.*]] = sub i64 [[TMP1]], [[TMP2]]
+// CHECK1-NEXT:    [[TMP4:%.*]] = sdiv exact i64 [[TMP3]], ptrtoint (double* getelementptr (double, double* null, i32 1) to i64)
+// CHECK1-NEXT:    [[TMP5:%.*]] = add nuw i64 [[TMP4]], 1
+// CHECK1-NEXT:    [[TMP6:%.*]] = mul nuw i64 [[TMP5]], ptrtoint (double* getelementptr (double, double* null, i32 1) to i64)
+// CHECK1-NEXT:    [[TMP7:%.*]] = call i8* @llvm.stacksave()
+// CHECK1-NEXT:    store i8* [[TMP7]], i8** [[SAVED_STACK]], align 8
+// CHECK1-NEXT:    [[VLA:%.*]] = alloca double, i64 [[TMP5]], align 8
+// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[__VLA_EXPR0]], align 8
+// CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr double, double* [[VLA]], i64 [[TMP5]]
+// CHECK1-NEXT:    [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq double* [[VLA]], [[TMP8]]
+// CHECK1-NEXT:    br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
+// CHECK1:       omp.arrayinit.body:
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi double* [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK1-NEXT:    store double 0.000000e+00, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
+// CHECK1-NEXT:    br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
+// CHECK1:       omp.arrayinit.done:
+// CHECK1-NEXT:    [[TMP9:%.*]] = bitcast [10 x [10 x [10 x double]]]* [[TMP0]] to double*
+// CHECK1-NEXT:    [[TMP10:%.*]] = ptrtoint double* [[TMP9]] to i64
+// CHECK1-NEXT:    [[TMP11:%.*]] = ptrtoint double* [[ARRAYIDX3]] to i64
+// CHECK1-NEXT:    [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]]
+// CHECK1-NEXT:    [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (double* getelementptr (double, double* null, i32 1) to i64)
+// CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr double, double* [[VLA]], i64 [[TMP13]]
+// CHECK1-NEXT:    [[TMP15:%.*]] = bitcast double* [[TMP14]] to [10 x [10 x [10 x double]]]*
+// CHECK1-NEXT:    [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [1 x %struct.kmp_taskred_input_t], [1 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64 0
+// CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 0
+// CHECK1-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], [10 x [10 x [10 x double]]]* [[TMP0]], i64 0, i64 0
+// CHECK1-NEXT:    [[ARRAYDECAY10:%.*]] = getelementptr inbounds [10 x [10 x double]], [10 x [10 x double]]* [[ARRAYIDX9]], i64 0, i64 0
+// CHECK1-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYDECAY10]], i64 2
+// CHECK1-NEXT:    [[ARRAYDECAY12:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX11]], i64 0, i64 0
+// CHECK1-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[ARRAYDECAY12]], i64 1
+// CHECK1-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], [10 x [10 x [10 x double]]]* [[TMP0]], i64 0, i64 1
+// CHECK1-NEXT:    [[ARRAYDECAY15:%.*]] = getelementptr inbounds [10 x [10 x double]], [10 x [10 x double]]* [[ARRAYIDX14]], i64 0, i64 0
+// CHECK1-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYDECAY15]], i64 5
+// CHECK1-NEXT:    [[ARRAYDECAY17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 0
+// CHECK1-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYDECAY17]], i64 1
+// CHECK1-NEXT:    [[TMP17:%.*]] = bitcast double* [[VLA]] to i8*
+// CHECK1-NEXT:    store i8* [[TMP17]], i8** [[TMP16]], align 8
+// CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1
+// CHECK1-NEXT:    [[TMP19:%.*]] = bitcast double* [[ARRAYIDX13]] to i8*
+// CHECK1-NEXT:    store i8* [[TMP19]], i8** [[TMP18]], align 8
+// CHECK1-NEXT:    [[TMP20:%.*]] = ptrtoint double* [[ARRAYIDX18]] to i64
+// CHECK1-NEXT:    [[TMP21:%.*]] = ptrtoint double* [[ARRAYIDX13]] to i64
+// CHECK1-NEXT:    [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]]
+// CHECK1-NEXT:    [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (double* getelementptr (double, double* null, i32 1) to i64)
+// CHECK1-NEXT:    [[TMP24:%.*]] = add nuw i64 [[TMP23]], 1
+// CHECK1-NEXT:    [[TMP25:%.*]] = mul nuw i64 [[TMP24]], ptrtoint (double* getelementptr (double, double* null, i32 1) to i64)
+// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2
+// CHECK1-NEXT:    store i64 [[TMP25]], i64* [[TMP26]], align 8
+// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3
+// CHECK1-NEXT:    store i8* bitcast (void (i8*, i8*)* @.red_init. to i8*), i8** [[TMP27]], align 8
+// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4
+// CHECK1-NEXT:    store i8* null, i8** [[TMP28]], align 8
+// CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5
+// CHECK1-NEXT:    store i8* bitcast (void (i8*, i8*)* @.red_comb. to i8*), i8** [[TMP29]], align 8
+// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6
+// CHECK1-NEXT:    store i32 1, i32* [[TMP30]], align 8
+// CHECK1-NEXT:    [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT:    [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4
+// CHECK1-NEXT:    [[TMP33:%.*]] = bitcast [1 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8*
+// CHECK1-NEXT:    [[TMP34:%.*]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @[[GLOB2]], i32 [[TMP32]], i32 1, i32 1, i8* [[TMP33]])
+// CHECK1-NEXT:    store i8* [[TMP34]], i8** [[DOTTASK_RED_]], align 8
+// CHECK1-NEXT:    [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+// CHECK1-NEXT:    call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP36]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1)
+// CHECK1-NEXT:    [[TMP37:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[TMP37]], 9
+// CHECK1-NEXT:    br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK1:       cond.true:
+// CHECK1-NEXT:    br label [[COND_END:%.*]]
+// CHECK1:       cond.false:
+// CHECK1-NEXT:    [[TMP38:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8
+// CHECK1-NEXT:    br label [[COND_END]]
+// CHECK1:       cond.end:
+// CHECK1-NEXT:    [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP38]], [[COND_FALSE]] ]
+// CHECK1-NEXT:    store i64 [[COND]], i64* [[DOTOMP_UB]], align 8
+// CHECK1-NEXT:    [[TMP39:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8
+// CHECK1-NEXT:    store i64 [[TMP39]], i64* [[DOTOMP_IV]], align 8
+// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// CHECK1:       omp.inner.for.cond:
+// CHECK1-NEXT:    [[TMP40:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
+// CHECK1-NEXT:    [[TMP41:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8
+// CHECK1-NEXT:    [[CMP19:%.*]] = icmp sle i64 [[TMP40]], [[TMP41]]
+// CHECK1-NEXT:    br i1 [[CMP19]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
+// CHECK1:       omp.inner.for.cond.cleanup:
+// CHECK1-NEXT:    br label [[OMP_INNER_FOR_END:%.*]]
+// CHECK1:       omp.inner.for.body:
+// CHECK1-NEXT:    [[TMP42:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
+// CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i64 [[TMP42]], 1
+// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i64 0, [[MUL]]
+// CHECK1-NEXT:    store i64 [[ADD]], i64* [[I]], align 8
+// CHECK1-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// CHECK1:       omp.body.continue:
+// CHECK1-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// CHECK1:       omp.inner.for.inc:
+// CHECK1-NEXT:    [[TMP43:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
+// CHECK1-NEXT:    [[ADD20:%.*]] = add nsw i64 [[TMP43]], 1
+// CHECK1-NEXT:    store i64 [[ADD20]], i64* [[DOTOMP_IV]], align 8
+// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// CHECK1:       omp.inner.for.end:
+// CHECK1-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// CHECK1:       omp.loop.exit:
+// CHECK1-NEXT:    [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT:    [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4
+// CHECK1-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP45]])
+// CHECK1-NEXT:    [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT:    [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4
+// CHECK1-NEXT:    call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP47]], i32 1)
+// CHECK1-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// CHECK1-NEXT:    [[TMP49:%.*]] = bitcast double* [[VLA]] to i8*
+// CHECK1-NEXT:    store i8* [[TMP49]], i8** [[TMP48]], align 8
+// CHECK1-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
+// CHECK1-NEXT:    [[TMP51:%.*]] = inttoptr i64 [[TMP5]] to i8*
+// CHECK1-NEXT:    store i8* [[TMP51]], i8** [[TMP50]], align 8
+// CHECK1-NEXT:    [[TMP52:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT:    [[TMP53:%.*]] = load i32, i32* [[TMP52]], align 4
+// CHECK1-NEXT:    [[TMP54:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// CHECK1-NEXT:    [[TMP55:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP53]], i32 1, i64 16, i8* [[TMP54]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT:    switch i32 [[TMP55]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK1-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// CHECK1-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// CHECK1-NEXT:    ]
+// CHECK1:       .omp.reduction.case1:
+// CHECK1-NEXT:    [[TMP56:%.*]] = getelementptr double, double* [[ARRAYIDX3]], i64 [[TMP5]]
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq double* [[ARRAYIDX3]], [[TMP56]]
+// CHECK1-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE25:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
+// CHECK1:       omp.arraycpy.body:
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi double* [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi double* [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK1-NEXT:    [[TMP57:%.*]] = load double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], align 8
+// CHECK1-NEXT:    [[TMP58:%.*]] = load double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 8
+// CHECK1-NEXT:    [[ADD22:%.*]] = fadd double [[TMP57]], [[TMP58]]
+// CHECK1-NEXT:    store double [[ADD22]], double* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], align 8
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_DONE24:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP56]]
+// CHECK1-NEXT:    br i1 [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_BODY]]
+// CHECK1:       omp.arraycpy.done25:
+// CHECK1-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB1]], i32 [[TMP53]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK1:       .omp.reduction.case2:
+// CHECK1-NEXT:    [[TMP59:%.*]] = getelementptr double, double* [[ARRAYIDX3]], i64 [[TMP5]]
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_ISEMPTY26:%.*]] = icmp eq double* [[ARRAYIDX3]], [[TMP59]]
+// CHECK1-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY26]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY27:%.*]]
+// CHECK1:       omp.arraycpy.body27:
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_SRCELEMENTPAST28:%.*]] = phi double* [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ]
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_DESTELEMENTPAST29:%.*]] = phi double* [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ]
+// CHECK1-NEXT:    [[TMP60:%.*]] = load double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST28]], align 8
+// CHECK1-NEXT:    [[TMP61:%.*]] = bitcast double* [[OMP_ARRAYCPY_DESTELEMENTPAST29]] to i64*
+// CHECK1-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic i64, i64* [[TMP61]] monotonic, align 8
+// CHECK1-NEXT:    br label [[ATOMIC_CONT:%.*]]
+// CHECK1:       atomic_cont:
+// CHECK1-NEXT:    [[TMP62:%.*]] = phi i64 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY27]] ], [ [[TMP70:%.*]], [[ATOMIC_CONT]] ]
+// CHECK1-NEXT:    [[TMP63:%.*]] = bitcast double* [[ATOMIC_TEMP]] to i64*
+// CHECK1-NEXT:    [[TMP64:%.*]] = bitcast i64 [[TMP62]] to double
+// CHECK1-NEXT:    store double [[TMP64]], double* [[_TMP30]], align 8
+// CHECK1-NEXT:    [[TMP65:%.*]] = load double, double* [[_TMP30]], align 8
+// CHECK1-NEXT:    [[TMP66:%.*]] = load double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST28]], align 8
+// CHECK1-NEXT:    [[ADD31:%.*]] = fadd double [[TMP65]], [[TMP66]]
+// CHECK1-NEXT:    store double [[ADD31]], double* [[ATOMIC_TEMP]], align 8
+// CHECK1-NEXT:    [[TMP67:%.*]] = load i64, i64* [[TMP63]], align 8
+// CHECK1-NEXT:    [[TMP68:%.*]] = bitcast double* [[OMP_ARRAYCPY_DESTELEMENTPAST29]] to i64*
+// CHECK1-NEXT:    [[TMP69:%.*]] = cmpxchg i64* [[TMP68]], i64 [[TMP62]], i64 [[TMP67]] monotonic monotonic, align 8
+// CHECK1-NEXT:    [[TMP70]] = extractvalue { i64, i1 } [[TMP69]], 0
+// CHECK1-NEXT:    [[TMP71:%.*]] = extractvalue { i64, i1 } [[TMP69]], 1
+// CHECK1-NEXT:    br i1 [[TMP71]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]]
+// CHECK1:       atomic_exit:
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST29]], i32 1
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST28]], i32 1
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP59]]
+// CHECK1-NEXT:    br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY27]]
+// CHECK1:       omp.arraycpy.done35:
+// CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK1:       .omp.reduction.default:
+// CHECK1-NEXT:    [[TMP72:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP72]])
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@.red_init.
+// CHECK1-SAME: (i8* noalias noundef [[TMP0:%.*]], i8* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP2]] to double*
+// CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr double, double* [[TMP4]], i64 [[TMP3]]
+// CHECK1-NEXT:    [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq double* [[TMP4]], [[TMP5]]
+// CHECK1-NEXT:    br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
+// CHECK1:       omp.arrayinit.body:
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi double* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK1-NEXT:    store double 0.000000e+00, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]]
+// CHECK1-NEXT:    br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
+// CHECK1:       omp.arrayinit.done:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@.red_comb.
+// CHECK1-SAME: (i8* noundef [[TMP0:%.*]], i8* noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to double*
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = bitcast i8* [[TMP5]] to double*
+// CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr double, double* [[TMP4]], i64 [[TMP2]]
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq double* [[TMP4]], [[TMP7]]
+// CHECK1-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
+// CHECK1:       omp.arraycpy.body:
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi double* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi double* [ [[TMP4]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK1-NEXT:    [[TMP8:%.*]] = load double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
+// CHECK1-NEXT:    [[TMP9:%.*]] = load double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 8
+// CHECK1-NEXT:    [[ADD:%.*]] = fadd double [[TMP8]], [[TMP9]]
+// CHECK1-NEXT:    store double [[ADD]], double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]]
+// CHECK1-NEXT:    br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
+// CHECK1:       omp.arraycpy.done2:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.2
+// CHECK1-SAME: (i8* noundef [[TMP0:%.*]], i8* noundef [[TMP1:%.*]]) #[[ATTR4]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
+// CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x i8*]*
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
+// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]*
+// CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// CHECK1-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to double*
+// CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP3]], i64 0, i64 0
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
+// CHECK1-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to double*
+// CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP3]], i64 0, i64 1
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8
+// CHECK1-NEXT:    [[TMP14:%.*]] = ptrtoint i8* [[TMP13]] to i64
+// CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr double, double* [[TMP11]], i64 [[TMP14]]
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq double* [[TMP11]], [[TMP15]]
+// CHECK1-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
+// CHECK1:       omp.arraycpy.body:
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi double* [ [[TMP8]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi double* [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK1-NEXT:    [[TMP16:%.*]] = load double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
+// CHECK1-NEXT:    [[TMP17:%.*]] = load double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 8
+// CHECK1-NEXT:    [[ADD:%.*]] = fadd double [[TMP16]], [[TMP17]]
+// CHECK1-NEXT:    store double [[ADD]], double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
+// CHECK1-NEXT:    [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]]
+// CHECK1-NEXT:    br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
+// CHECK1:       omp.arraycpy.done2:
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_ZN2S2C2Ev
+// CHECK1-SAME: (%class.S2* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %class.S2*, align 8
+// CHECK1-NEXT:    store %class.S2* [[THIS]], %class.S2** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[THIS1:%.*]] = load %class.S2*, %class.S2** [[THIS_ADDR]], align 8
+// CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[CLASS_S2:%.*]], %class.S2* [[THIS1]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 0, i32* [[A]], align 4
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@main
+// CHECK1-SAME: () #[[ATTR10:[0-9]+]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[A:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    store i32 0, i32* [[RETVAL]], align 4
+// CHECK1-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z3barv()
+// CHECK1-NEXT:    store i32 [[CALL]], i32* [[A]], align 4
+// CHECK1-NEXT:    ret i32 0
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
+// CHECK1-SAME: () #[[ATTR11:[0-9]+]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    call void @__tgt_register_requires(i64 1)
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@_Z3sumPiiS_
+// CHECK2-SAME: (i32* noundef [[INPUT:%.*]], i32 noundef [[SIZE:%.*]], i32* noundef [[OUTPUT:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[INPUT_ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[SIZE_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[OUTPUT_ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[SIZE_CASTED:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4
+// CHECK2-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4
+// CHECK2-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4
+// CHECK2-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [3 x i64], align 4
+// CHECK2-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[SIZE_CASTED4:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS7:%.*]] = alloca [3 x i8*], align 4
+// CHECK2-NEXT:    [[DOTOFFLOAD_PTRS8:%.*]] = alloca [3 x i8*], align 4
+// CHECK2-NEXT:    [[DOTOFFLOAD_MAPPERS9:%.*]] = alloca [3 x i8*], align 4
+// CHECK2-NEXT:    [[DOTOFFLOAD_SIZES10:%.*]] = alloca [3 x i64], align 4
+// CHECK2-NEXT:    [[_TMP11:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_12:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_13:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
+// CHECK2-NEXT:    [[SIZE_CASTED20:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS22:%.*]] = alloca [2 x i8*], align 4
+// CHECK2-NEXT:    [[DOTOFFLOAD_PTRS23:%.*]] = alloca [2 x i8*], align 4
+// CHECK2-NEXT:    [[DOTOFFLOAD_MAPPERS24:%.*]] = alloca [2 x i8*], align 4
+// CHECK2-NEXT:    [[SIZE_CASTED27:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOFFLOAD_BASEPTRS29:%.*]] = alloca [2 x i8*], align 4
+// CHECK2-NEXT:    [[DOTOFFLOAD_PTRS30:%.*]] = alloca [2 x i8*], align 4
+// CHECK2-NEXT:    [[DOTOFFLOAD_MAPPERS31:%.*]] = alloca [2 x i8*], align 4
+// CHECK2-NEXT:    store i32* [[INPUT]], i32** [[INPUT_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store i32* [[OUTPUT]], i32** [[OUTPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], i32* [[SIZE_CASTED]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[SIZE_CASTED]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP7]], i32 0
+// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4
+// CHECK2-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
+// CHECK2-NEXT:    [[TMP11:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i32*
+// CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TMP13]], align 4
+// CHECK2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
+// CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TMP15]], align 4
+// CHECK2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK2-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK2-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK2-NEXT:    store i32* [[TMP4]], i32** [[TMP18]], align 4
+// CHECK2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK2-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK2-NEXT:    store i32* [[ARRAYIDX]], i32** [[TMP20]], align 4
+// CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK2-NEXT:    store i8* null, i8** [[TMP21]], align 4
+// CHECK2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32**
+// CHECK2-NEXT:    store i32* [[TMP6]], i32** [[TMP23]], align 4
+// CHECK2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32**
+// CHECK2-NEXT:    store i32* [[ARRAYIDX1]], i32** [[TMP25]], align 4
+// CHECK2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK2-NEXT:    store i64 [[TMP10]], i64* [[TMP26]], align 4
+// CHECK2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK2-NEXT:    store i8* null, i8** [[TMP27]], align 4
+// CHECK2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP31:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP32]], 0
+// CHECK2-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK2-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK2-NEXT:    store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT:    [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP33]], 1
+// CHECK2-NEXT:    [[TMP34:%.*]] = zext i32 [[ADD]] to i64
+// CHECK2-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4:[0-9]+]], i64 -1, i64 [[TMP34]])
+// CHECK2-NEXT:    [[TMP35:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69.region_id, i32 3, i8** [[TMP28]], i8** [[TMP29]], i64* [[TMP30]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK2-NEXT:    [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0
+// CHECK2-NEXT:    br i1 [[TMP36]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK2:       omp_offload.failed:
+// CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69(i32 [[TMP1]], i32* [[TMP2]], i32* [[TMP3]]) #[[ATTR2:[0-9]+]]
+// CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT]]
+// CHECK2:       omp_offload.cont:
+// CHECK2-NEXT:    [[TMP37:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP37]], i32* [[SIZE_CASTED4]], align 4
+// CHECK2-NEXT:    [[TMP38:%.*]] = load i32, i32* [[SIZE_CASTED4]], align 4
+// CHECK2-NEXT:    [[TMP39:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP40:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP41:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP42:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[TMP42]], i32 0
+// CHECK2-NEXT:    [[TMP43:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP44:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP44]], i32 0
+// CHECK2-NEXT:    [[TMP45:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP46:%.*]] = mul nuw i32 [[TMP45]], 4
+// CHECK2-NEXT:    [[TMP47:%.*]] = sext i32 [[TMP46]] to i64
+// CHECK2-NEXT:    [[TMP48:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES10]] to i8*
+// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP48]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.7 to i8*), i32 24, i1 false)
+// CHECK2-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32*
+// CHECK2-NEXT:    store i32 [[TMP38]], i32* [[TMP50]], align 4
+// CHECK2-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32*
+// CHECK2-NEXT:    store i32 [[TMP38]], i32* [[TMP52]], align 4
+// CHECK2-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 0
+// CHECK2-NEXT:    store i8* null, i8** [[TMP53]], align 4
+// CHECK2-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 1
+// CHECK2-NEXT:    [[TMP55:%.*]] = bitcast i8** [[TMP54]] to i32**
+// CHECK2-NEXT:    store i32* [[TMP41]], i32** [[TMP55]], align 4
+// CHECK2-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 1
+// CHECK2-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to i32**
+// CHECK2-NEXT:    store i32* [[ARRAYIDX5]], i32** [[TMP57]], align 4
+// CHECK2-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 1
+// CHECK2-NEXT:    store i8* null, i8** [[TMP58]], align 4
+// CHECK2-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP60:%.*]] = bitcast i8** [[TMP59]] to i32**
+// CHECK2-NEXT:    store i32* [[TMP43]], i32** [[TMP60]], align 4
+// CHECK2-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32**
+// CHECK2-NEXT:    store i32* [[ARRAYIDX6]], i32** [[TMP62]], align 4
+// CHECK2-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES10]], i32 0, i32 2
+// CHECK2-NEXT:    store i64 [[TMP47]], i64* [[TMP63]], align 4
+// CHECK2-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 2
+// CHECK2-NEXT:    store i8* null, i8** [[TMP64]], align 4
+// CHECK2-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES10]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP68:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP68]], i32* [[DOTCAPTURE_EXPR_12]], align 4
+// CHECK2-NEXT:    [[TMP69:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_12]], align 4
+// CHECK2-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[TMP69]], 0
+// CHECK2-NEXT:    [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1
+// CHECK2-NEXT:    [[SUB16:%.*]] = sub nsw i32 [[DIV15]], 1
+// CHECK2-NEXT:    store i32 [[SUB16]], i32* [[DOTCAPTURE_EXPR_13]], align 4
+// CHECK2-NEXT:    [[TMP70:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_13]], align 4
+// CHECK2-NEXT:    [[ADD17:%.*]] = add nsw i32 [[TMP70]], 1
+// CHECK2-NEXT:    [[TMP71:%.*]] = zext i32 [[ADD17]] to i64
+// CHECK2-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i64 [[TMP71]])
+// CHECK2-NEXT:    [[TMP72:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73.region_id, i32 3, i8** [[TMP65]], i8** [[TMP66]], i64* [[TMP67]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK2-NEXT:    [[TMP73:%.*]] = icmp ne i32 [[TMP72]], 0
+// CHECK2-NEXT:    br i1 [[TMP73]], label [[OMP_OFFLOAD_FAILED18:%.*]], label [[OMP_OFFLOAD_CONT19:%.*]]
+// CHECK2:       omp_offload.failed18:
+// CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73(i32 [[TMP38]], i32* [[TMP39]], i32* [[TMP40]]) #[[ATTR2]]
+// CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT19]]
+// CHECK2:       omp_offload.cont19:
+// CHECK2-NEXT:    [[TMP74:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP74]], i32* [[SIZE_CASTED20]], align 4
+// CHECK2-NEXT:    [[TMP75:%.*]] = load i32, i32* [[SIZE_CASTED20]], align 4
+// CHECK2-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[A]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i32*
+// CHECK2-NEXT:    store i32 [[TMP75]], i32* [[TMP77]], align 4
+// CHECK2-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32*
+// CHECK2-NEXT:    store i32 [[TMP75]], i32* [[TMP79]], align 4
+// CHECK2-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i32 0, i32 0
+// CHECK2-NEXT:    store i8* null, i8** [[TMP80]], align 4
+// CHECK2-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK2-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to [10 x i32]**
+// CHECK2-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[TMP82]], align 4
+// CHECK2-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
+// CHECK2-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
+// CHECK2-NEXT:    store i32* [[ARRAYIDX21]], i32** [[TMP84]], align 4
+// CHECK2-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i32 0, i32 1
+// CHECK2-NEXT:    store i8* null, i8** [[TMP85]], align 4
+// CHECK2-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP88:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l78.region_id, i32 2, i8** [[TMP86]], i8** [[TMP87]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK2-NEXT:    [[TMP89:%.*]] = icmp ne i32 [[TMP88]], 0
+// CHECK2-NEXT:    br i1 [[TMP89]], label [[OMP_OFFLOAD_FAILED25:%.*]], label [[OMP_OFFLOAD_CONT26:%.*]]
+// CHECK2:       omp_offload.failed25:
+// CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l78(i32 [[TMP75]], [10 x i32]* [[A]]) #[[ATTR2]]
+// CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT26]]
+// CHECK2:       omp_offload.cont26:
+// CHECK2-NEXT:    [[TMP90:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP90]], i32* [[SIZE_CASTED27]], align 4
+// CHECK2-NEXT:    [[TMP91:%.*]] = load i32, i32* [[SIZE_CASTED27]], align 4
+// CHECK2-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[A]], i32 0, i32 3
+// CHECK2-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP93:%.*]] = bitcast i8** [[TMP92]] to i32*
+// CHECK2-NEXT:    store i32 [[TMP91]], i32* [[TMP93]], align 4
+// CHECK2-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP95:%.*]] = bitcast i8** [[TMP94]] to i32*
+// CHECK2-NEXT:    store i32 [[TMP91]], i32* [[TMP95]], align 4
+// CHECK2-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 0
+// CHECK2-NEXT:    store i8* null, i8** [[TMP96]], align 4
+// CHECK2-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 1
+// CHECK2-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to [10 x i32]**
+// CHECK2-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[TMP98]], align 4
+// CHECK2-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 1
+// CHECK2-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to i32**
+// CHECK2-NEXT:    store i32* [[ARRAYIDX28]], i32** [[TMP100]], align 4
+// CHECK2-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 1
+// CHECK2-NEXT:    store i8* null, i8** [[TMP101]], align 4
+// CHECK2-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP104:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l81.region_id, i32 2, i8** [[TMP102]], i8** [[TMP103]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.15, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK2-NEXT:    [[TMP105:%.*]] = icmp ne i32 [[TMP104]], 0
+// CHECK2-NEXT:    br i1 [[TMP105]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
+// CHECK2:       omp_offload.failed32:
+// CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l81(i32 [[TMP91]], [10 x i32]* [[A]]) #[[ATTR2]]
+// CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT33]]
+// CHECK2:       omp_offload.cont33:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69
+// CHECK2-SAME: (i32 noundef [[SIZE:%.*]], i32* noundef [[OUTPUT:%.*]], i32* noundef [[INPUT:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[SIZE_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[OUTPUT_ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[INPUT_ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[SIZE_CASTED:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store i32* [[OUTPUT]], i32** [[OUTPUT_ADDR]], align 4
+// CHECK2-NEXT:    store i32* [[INPUT]], i32** [[INPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], i32* [[SIZE_CASTED]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[SIZE_CASTED]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32 [[TMP1]], i32* [[TMP2]], i32* [[TMP3]])
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.
+// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], i32* noundef [[OUTPUT:%.*]], i32* noundef [[INPUT:%.*]]) #[[ATTR1]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[SIZE_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[OUTPUT_ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[INPUT_ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[OUTPUT1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[TMP:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[_TMP2:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[I5:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[SIZE_CASTED:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4
+// CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store i32* [[OUTPUT]], i32** [[OUTPUT_ADDR]], align 4
+// CHECK2-NEXT:    store i32* [[INPUT]], i32** [[INPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 0
+// CHECK2-NEXT:    store i32 0, i32* [[OUTPUT1]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = ptrtoint i32* [[TMP1]] to i64
+// CHECK2-NEXT:    [[TMP3:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64
+// CHECK2-NEXT:    [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]]
+// CHECK2-NEXT:    [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64)
+// CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr i32, i32* [[OUTPUT1]], i64 [[TMP5]]
+// CHECK2-NEXT:    store i32* [[TMP6]], i32** [[TMP]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0
+// CHECK2-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK2-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK2-NEXT:    store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK2-NEXT:    store i32 0, i32* [[I]], align 4
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP9]]
+// CHECK2-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// CHECK2:       omp.precond.then:
+// CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK2-NEXT:    store i32 [[TMP10]], i32* [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT:    [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
+// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK2-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]]
+// CHECK2-NEXT:    br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK2:       cond.true:
+// CHECK2-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK2-NEXT:    br label [[COND_END:%.*]]
+// CHECK2:       cond.false:
+// CHECK2-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT:    br label [[COND_END]]
+// CHECK2:       cond.end:
+// CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ]
+// CHECK2-NEXT:    store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
+// CHECK2-NEXT:    store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// CHECK2:       omp.inner.for.cond:
+// CHECK2-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT:    [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]]
+// CHECK2-NEXT:    br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK2:       omp.inner.for.body:
+// CHECK2-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
+// CHECK2-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT:    [[TMP22:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP22]], i32* [[SIZE_CASTED]], align 4
+// CHECK2-NEXT:    [[TMP23:%.*]] = load i32, i32* [[SIZE_CASTED]], align 4
+// CHECK2-NEXT:    [[TMP24:%.*]] = load i32*, i32** [[TMP]], align 4
+// CHECK2-NEXT:    [[TMP25:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP20]], i32 [[TMP21]], i32 [[TMP23]], i32* [[TMP24]], i32* [[TMP25]])
+// CHECK2-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// CHECK2:       omp.inner.for.inc:
+// CHECK2-NEXT:    [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]]
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// CHECK2:       omp.inner.for.end:
+// CHECK2-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// CHECK2:       omp.loop.exit:
+// CHECK2-NEXT:    [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT:    [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP29]])
+// CHECK2-NEXT:    br label [[OMP_PRECOND_END]]
+// CHECK2:       omp.precond.end:
+// CHECK2-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP31:%.*]] = bitcast i32* [[OUTPUT1]] to i8*
+// CHECK2-NEXT:    store i8* [[TMP31]], i8** [[TMP30]], align 4
+// CHECK2-NEXT:    [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+// CHECK2-NEXT:    [[TMP34:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// CHECK2-NEXT:    [[TMP35:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP33]], i32 1, i32 4, i8* [[TMP34]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// CHECK2-NEXT:    switch i32 [[TMP35]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK2-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// CHECK2-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// CHECK2-NEXT:    ]
+// CHECK2:       .omp.reduction.case1:
+// CHECK2-NEXT:    [[TMP36:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK2-NEXT:    [[TMP37:%.*]] = load i32, i32* [[OUTPUT1]], align 4
+// CHECK2-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]]
+// CHECK2-NEXT:    store i32 [[ADD8]], i32* [[ARRAYIDX]], align 4
+// CHECK2-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK2:       .omp.reduction.case2:
+// CHECK2-NEXT:    [[TMP38:%.*]] = load i32, i32* [[OUTPUT1]], align 4
+// CHECK2-NEXT:    [[TMP39:%.*]] = atomicrmw add i32* [[ARRAYIDX]], i32 [[TMP38]] monotonic, align 4
+// CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK2:       .omp.reduction.default:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1
+// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[SIZE:%.*]], i32* noundef [[OUTPUT:%.*]], i32* noundef [[INPUT:%.*]]) #[[ATTR1]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[SIZE_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[OUTPUT_ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[INPUT_ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[OUTPUT3:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[_TMP4:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[I5:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4
+// CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store i32* [[OUTPUT]], i32** [[OUTPUT_ADDR]], align 4
+// CHECK2-NEXT:    store i32* [[INPUT]], i32** [[INPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
+// CHECK2-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK2-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK2-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    store i32 0, i32* [[I]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP2]]
+// CHECK2-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// CHECK2:       omp.precond.then:
+// CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    store i32 [[TMP3]], i32* [[DOTOMP_UB]], align 4
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP4]], i32* [[DOTOMP_LB]], align 4
+// CHECK2-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4
+// CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 0
+// CHECK2-NEXT:    store i32 0, i32* [[OUTPUT3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP8:%.*]] = ptrtoint i32* [[TMP7]] to i64
+// CHECK2-NEXT:    [[TMP9:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64
+// CHECK2-NEXT:    [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]]
+// CHECK2-NEXT:    [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64)
+// CHECK2-NEXT:    [[TMP12:%.*]] = getelementptr i32, i32* [[OUTPUT3]], i64 [[TMP11]]
+// CHECK2-NEXT:    store i32* [[TMP12]], i32** [[_TMP4]], align 4
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4
+// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK2-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// CHECK2-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]]
+// CHECK2-NEXT:    br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK2:       cond.true:
+// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    br label [[COND_END:%.*]]
+// CHECK2:       cond.false:
+// CHECK2-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// CHECK2-NEXT:    br label [[COND_END]]
+// CHECK2:       cond.end:
+// CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ]
+// CHECK2-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// CHECK2-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// CHECK2-NEXT:    store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// CHECK2:       omp.inner.for.cond:
+// CHECK2-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// CHECK2-NEXT:    [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]]
+// CHECK2-NEXT:    br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK2:       omp.inner.for.body:
+// CHECK2-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[I5]], align 4
+// CHECK2-NEXT:    [[TMP23:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP24:%.*]] = load i32, i32* [[I5]], align 4
+// CHECK2-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 [[TMP24]]
+// CHECK2-NEXT:    [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4
+// CHECK2-NEXT:    [[TMP26:%.*]] = load i32*, i32** [[_TMP4]], align 4
+// CHECK2-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 0
+// CHECK2-NEXT:    [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4
+// CHECK2-NEXT:    [[ADD10:%.*]] = add nsw i32 [[TMP27]], [[TMP25]]
+// CHECK2-NEXT:    store i32 [[ADD10]], i32* [[ARRAYIDX9]], align 4
+// CHECK2-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// CHECK2:       omp.body.continue:
+// CHECK2-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// CHECK2:       omp.inner.for.inc:
+// CHECK2-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK2-NEXT:    store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// CHECK2:       omp.inner.for.end:
+// CHECK2-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// CHECK2:       omp.loop.exit:
+// CHECK2-NEXT:    [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT:    [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP30]])
+// CHECK2-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP32:%.*]] = bitcast i32* [[OUTPUT3]] to i8*
+// CHECK2-NEXT:    store i8* [[TMP32]], i8** [[TMP31]], align 4
+// CHECK2-NEXT:    [[TMP33:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT:    [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4
+// CHECK2-NEXT:    [[TMP35:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// CHECK2-NEXT:    [[TMP36:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], i32 1, i32 4, i8* [[TMP35]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// CHECK2-NEXT:    switch i32 [[TMP36]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK2-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// CHECK2-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// CHECK2-NEXT:    ]
+// CHECK2:       .omp.reduction.case1:
+// CHECK2-NEXT:    [[TMP37:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK2-NEXT:    [[TMP38:%.*]] = load i32, i32* [[OUTPUT3]], align 4
+// CHECK2-NEXT:    [[ADD12:%.*]] = add nsw i32 [[TMP37]], [[TMP38]]
+// CHECK2-NEXT:    store i32 [[ADD12]], i32* [[ARRAYIDX]], align 4
+// CHECK2-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP34]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK2:       .omp.reduction.case2:
+// CHECK2-NEXT:    [[TMP39:%.*]] = load i32, i32* [[OUTPUT3]], align 4
+// CHECK2-NEXT:    [[TMP40:%.*]] = atomicrmw add i32* [[ARRAYIDX]], i32 [[TMP39]] monotonic, align 4
+// CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK2:       .omp.reduction.default:
+// CHECK2-NEXT:    br label [[OMP_PRECOND_END]]
+// CHECK2:       omp.precond.end:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
+// CHECK2-SAME: (i8* noundef [[TMP0:%.*]], i8* noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 4
+// CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 4
+// CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 4
+// CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 4
+// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
+// CHECK2-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
+// CHECK2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4
+// CHECK2-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.2
+// CHECK2-SAME: (i8* noundef [[TMP0:%.*]], i8* noundef [[TMP1:%.*]]) #[[ATTR3]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 4
+// CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 4
+// CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 4
+// CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 4
+// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
+// CHECK2-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
+// CHECK2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4
+// CHECK2-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73
+// CHECK2-SAME: (i32 noundef [[SIZE:%.*]], i32* noundef [[OUTPUT:%.*]], i32* noundef [[INPUT:%.*]]) #[[ATTR1]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[SIZE_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[OUTPUT_ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[INPUT_ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[SIZE_CASTED:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store i32* [[OUTPUT]], i32** [[OUTPUT_ADDR]], align 4
+// CHECK2-NEXT:    store i32* [[INPUT]], i32** [[INPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], i32* [[SIZE_CASTED]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[SIZE_CASTED]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP1]], i32* [[TMP2]], i32* [[TMP3]])
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3
+// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], i32* noundef [[OUTPUT:%.*]], i32* noundef [[INPUT:%.*]]) #[[ATTR1]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[SIZE_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[OUTPUT_ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[INPUT_ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[OUTPUT2:%.*]] = alloca [3 x i32], align 4
+// CHECK2-NEXT:    [[TMP:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[_TMP3:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[I6:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[SIZE_CASTED:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4
+// CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store i32* [[OUTPUT]], i32** [[OUTPUT_ADDR]], align 4
+// CHECK2-NEXT:    store i32* [[INPUT]], i32** [[INPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 0
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 2
+// CHECK2-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* [[OUTPUT2]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP2:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i32 3
+// CHECK2-NEXT:    [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP2]]
+// CHECK2-NEXT:    br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
+// CHECK2:       omp.arrayinit.body:
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK2-NEXT:    store i32 0, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]]
+// CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
+// CHECK2:       omp.arrayinit.done:
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP4:%.*]] = ptrtoint i32* [[TMP3]] to i64
+// CHECK2-NEXT:    [[TMP5:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64
+// CHECK2-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]]
+// CHECK2-NEXT:    [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64)
+// CHECK2-NEXT:    [[TMP8:%.*]] = bitcast [3 x i32]* [[OUTPUT2]] to i32*
+// CHECK2-NEXT:    [[TMP9:%.*]] = getelementptr i32, i32* [[TMP8]], i64 [[TMP7]]
+// CHECK2-NEXT:    store i32* [[TMP9]], i32** [[TMP]], align 4
+// CHECK2-NEXT:    [[RHS_BEGIN:%.*]] = bitcast [3 x i32]* [[OUTPUT2]] to i32*
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0
+// CHECK2-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK2-NEXT:    [[SUB5:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK2-NEXT:    store i32 [[SUB5]], i32* [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK2-NEXT:    store i32 0, i32* [[I]], align 4
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP12]]
+// CHECK2-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// CHECK2:       omp.precond.then:
+// CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_COMB_LB]], align 4
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK2-NEXT:    store i32 [[TMP13]], i32* [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
+// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP15]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK2-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK2-NEXT:    [[CMP7:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]]
+// CHECK2-NEXT:    br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK2:       cond.true:
+// CHECK2-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4
+// CHECK2-NEXT:    br label [[COND_END:%.*]]
+// CHECK2:       cond.false:
+// CHECK2-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT:    br label [[COND_END]]
+// CHECK2:       cond.end:
+// CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ]
+// CHECK2-NEXT:    store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
+// CHECK2-NEXT:    store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// CHECK2:       omp.inner.for.cond:
+// CHECK2-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT:    [[CMP8:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]]
+// CHECK2-NEXT:    br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK2:       omp.inner.for.body:
+// CHECK2-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
+// CHECK2-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT:    [[TMP25:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP25]], i32* [[SIZE_CASTED]], align 4
+// CHECK2-NEXT:    [[TMP26:%.*]] = load i32, i32* [[SIZE_CASTED]], align 4
+// CHECK2-NEXT:    [[TMP27:%.*]] = load i32*, i32** [[TMP]], align 4
+// CHECK2-NEXT:    [[TMP28:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP23]], i32 [[TMP24]], i32 [[TMP26]], i32* [[TMP27]], i32* [[TMP28]])
+// CHECK2-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// CHECK2:       omp.inner.for.inc:
+// CHECK2-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]]
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// CHECK2:       omp.inner.for.end:
+// CHECK2-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// CHECK2:       omp.loop.exit:
+// CHECK2-NEXT:    [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT:    [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP32]])
+// CHECK2-NEXT:    br label [[OMP_PRECOND_END]]
+// CHECK2:       omp.precond.end:
+// CHECK2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP34:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8*
+// CHECK2-NEXT:    store i8* [[TMP34]], i8** [[TMP33]], align 4
+// CHECK2-NEXT:    [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+// CHECK2-NEXT:    [[TMP37:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// CHECK2-NEXT:    [[TMP38:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP36]], i32 1, i32 4, i8* [[TMP37]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// CHECK2-NEXT:    switch i32 [[TMP38]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK2-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// CHECK2-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// CHECK2-NEXT:    ]
+// CHECK2:       .omp.reduction.case1:
+// CHECK2-NEXT:    [[TMP39:%.*]] = getelementptr i32, i32* [[ARRAYIDX]], i32 3
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[ARRAYIDX]], [[TMP39]]
+// CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
+// CHECK2:       omp.arraycpy.body:
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi i32* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK2-NEXT:    [[TMP40:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST9]], align 4
+// CHECK2-NEXT:    [[TMP41:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
+// CHECK2-NEXT:    [[ADD10:%.*]] = add nsw i32 [[TMP40]], [[TMP41]]
+// CHECK2-NEXT:    store i32 [[ADD10]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST9]], align 4
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DONE12:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP39]]
+// CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]]
+// CHECK2:       omp.arraycpy.done13:
+// CHECK2-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP36]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK2:       .omp.reduction.case2:
+// CHECK2-NEXT:    [[TMP42:%.*]] = getelementptr i32, i32* [[ARRAYIDX]], i32 3
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_ISEMPTY14:%.*]] = icmp eq i32* [[ARRAYIDX]], [[TMP42]]
+// CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY14]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY15:%.*]]
+// CHECK2:       omp.arraycpy.body15:
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_SRCELEMENTPAST16:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY15]] ]
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DESTELEMENTPAST17:%.*]] = phi i32* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY15]] ]
+// CHECK2-NEXT:    [[TMP43:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST16]], align 4
+// CHECK2-NEXT:    [[TMP44:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i32 [[TMP43]] monotonic, align 4
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DEST_ELEMENT18]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i32 1
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_SRC_ELEMENT19]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST16]], i32 1
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP42]]
+// CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY15]]
+// CHECK2:       omp.arraycpy.done21:
+// CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK2:       .omp.reduction.default:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..4
+// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[SIZE:%.*]], i32* noundef [[OUTPUT:%.*]], i32* noundef [[INPUT:%.*]]) #[[ATTR1]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[SIZE_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[OUTPUT_ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[INPUT_ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[OUTPUT4:%.*]] = alloca [3 x i32], align 4
+// CHECK2-NEXT:    [[_TMP5:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[I6:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4
+// CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store i32* [[OUTPUT]], i32** [[OUTPUT_ADDR]], align 4
+// CHECK2-NEXT:    store i32* [[INPUT]], i32** [[INPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP0]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0
+// CHECK2-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// CHECK2-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK2-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    store i32 0, i32* [[I]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP2]]
+// CHECK2-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// CHECK2:       omp.precond.then:
+// CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    store i32 [[TMP3]], i32* [[DOTOMP_UB]], align 4
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP4]], i32* [[DOTOMP_LB]], align 4
+// CHECK2-NEXT:    store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4
+// CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 0
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[TMP7]], i32 2
+// CHECK2-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* [[OUTPUT4]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP8:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i32 3
+// CHECK2-NEXT:    [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP8]]
+// CHECK2-NEXT:    br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
+// CHECK2:       omp.arrayinit.body:
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[ARRAY_BEGIN]], [[OMP_PRECOND_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK2-NEXT:    store i32 0, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
+// CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
+// CHECK2:       omp.arrayinit.done:
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i32*, i32** [[OUTPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP10:%.*]] = ptrtoint i32* [[TMP9]] to i64
+// CHECK2-NEXT:    [[TMP11:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64
+// CHECK2-NEXT:    [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]]
+// CHECK2-NEXT:    [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64)
+// CHECK2-NEXT:    [[TMP14:%.*]] = bitcast [3 x i32]* [[OUTPUT4]] to i32*
+// CHECK2-NEXT:    [[TMP15:%.*]] = getelementptr i32, i32* [[TMP14]], i64 [[TMP13]]
+// CHECK2-NEXT:    store i32* [[TMP15]], i32** [[_TMP5]], align 4
+// CHECK2-NEXT:    [[RHS_BEGIN:%.*]] = bitcast [3 x i32]* [[OUTPUT4]] to i32*
+// CHECK2-NEXT:    [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4
+// CHECK2-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP17]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK2-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// CHECK2-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    [[CMP7:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]]
+// CHECK2-NEXT:    br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK2:       cond.true:
+// CHECK2-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT:    br label [[COND_END:%.*]]
+// CHECK2:       cond.false:
+// CHECK2-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// CHECK2-NEXT:    br label [[COND_END]]
+// CHECK2:       cond.end:
+// CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ]
+// CHECK2-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// CHECK2-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// CHECK2-NEXT:    store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// CHECK2:       omp.inner.for.cond:
+// CHECK2-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// CHECK2-NEXT:    [[CMP8:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]]
+// CHECK2-NEXT:    br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK2:       omp.inner.for.body:
+// CHECK2-NEXT:    [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[I6]], align 4
+// CHECK2-NEXT:    [[TMP26:%.*]] = load i32*, i32** [[INPUT_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP27:%.*]] = load i32, i32* [[I6]], align 4
+// CHECK2-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 [[TMP27]]
+// CHECK2-NEXT:    [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4
+// CHECK2-NEXT:    [[TMP29:%.*]] = load i32*, i32** [[_TMP5]], align 4
+// CHECK2-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i32 0
+// CHECK2-NEXT:    [[TMP30:%.*]] = load i32, i32* [[ARRAYIDX10]], align 4
+// CHECK2-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP30]], [[TMP28]]
+// CHECK2-NEXT:    store i32 [[ADD11]], i32* [[ARRAYIDX10]], align 4
+// CHECK2-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// CHECK2:       omp.body.continue:
+// CHECK2-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// CHECK2:       omp.inner.for.inc:
+// CHECK2-NEXT:    [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    [[ADD12:%.*]] = add nsw i32 [[TMP31]], 1
+// CHECK2-NEXT:    store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4
+// CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// CHECK2:       omp.inner.for.end:
+// CHECK2-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// CHECK2:       omp.loop.exit:
+// CHECK2-NEXT:    [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+// CHECK2-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP33]])
+// CHECK2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP35:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8*
+// CHECK2-NEXT:    store i8* [[TMP35]], i8** [[TMP34]], align 4
+// CHECK2-NEXT:    [[TMP36:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT:    [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4
+// CHECK2-NEXT:    [[TMP38:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// CHECK2-NEXT:    [[TMP39:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP37]], i32 1, i32 4, i8* [[TMP38]], void (i8*, i8*)* @.omp.reduction.reduction_func.5, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// CHECK2-NEXT:    switch i32 [[TMP39]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK2-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// CHECK2-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// CHECK2-NEXT:    ]
+// CHECK2:       .omp.reduction.case1:
+// CHECK2-NEXT:    [[TMP40:%.*]] = getelementptr i32, i32* [[ARRAYIDX]], i32 3
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[ARRAYIDX]], [[TMP40]]
+// CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
+// CHECK2:       omp.arraycpy.body:
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DESTELEMENTPAST13:%.*]] = phi i32* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK2-NEXT:    [[TMP41:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4
+// CHECK2-NEXT:    [[TMP42:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
+// CHECK2-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP41]], [[TMP42]]
+// CHECK2-NEXT:    store i32 [[ADD14]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST13]], i32 1
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP40]]
+// CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]]
+// CHECK2:       omp.arraycpy.done17:
+// CHECK2-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP37]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK2:       .omp.reduction.case2:
+// CHECK2-NEXT:    [[TMP43:%.*]] = getelementptr i32, i32* [[ARRAYIDX]], i32 3
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq i32* [[ARRAYIDX]], [[TMP43]]
+// CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE25:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]]
+// CHECK2:       omp.arraycpy.body19:
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY19]] ]
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi i32* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT22:%.*]], [[OMP_ARRAYCPY_BODY19]] ]
+// CHECK2-NEXT:    [[TMP44:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 4
+// CHECK2-NEXT:    [[TMP45:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 [[TMP44]] monotonic, align 4
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DEST_ELEMENT22]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_SRC_ELEMENT23]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DONE24:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT22]], [[TMP43]]
+// CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_BODY19]]
+// CHECK2:       omp.arraycpy.done25:
+// CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK2:       .omp.reduction.default:
+// CHECK2-NEXT:    br label [[OMP_PRECOND_END]]
+// CHECK2:       omp.precond.end:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.5
+// CHECK2-SAME: (i8* noundef [[TMP0:%.*]], i8* noundef [[TMP1:%.*]]) #[[ATTR3]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 4
+// CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 4
+// CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 4
+// CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 4
+// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
+// CHECK2-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
+// CHECK2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4
+// CHECK2-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
+// CHECK2-NEXT:    [[TMP12:%.*]] = getelementptr i32, i32* [[TMP11]], i32 3
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[TMP11]], [[TMP12]]
+// CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
+// CHECK2:       omp.arraycpy.body:
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[TMP8]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]]
+// CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
+// CHECK2:       omp.arraycpy.done2:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.6
+// CHECK2-SAME: (i8* noundef [[TMP0:%.*]], i8* noundef [[TMP1:%.*]]) #[[ATTR3]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 4
+// CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 4
+// CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 4
+// CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 4
+// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
+// CHECK2-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
+// CHECK2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4
+// CHECK2-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
+// CHECK2-NEXT:    [[TMP12:%.*]] = getelementptr i32, i32* [[TMP11]], i32 3
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[TMP11]], [[TMP12]]
+// CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
+// CHECK2:       omp.arraycpy.body:
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[TMP8]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]]
+// CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
+// CHECK2:       omp.arraycpy.done2:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l78
+// CHECK2-SAME: (i32 noundef [[SIZE:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR6:[0-9]+]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[SIZE_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
+// CHECK2-NEXT:    [[SIZE_CASTED:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP1]], i32* [[SIZE_CASTED]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[SIZE_CASTED]], align 4
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP2]], [10 x i32]* [[TMP0]])
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR1]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[SIZE_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
+// CHECK2-NEXT:    [[A2:%.*]] = alloca [2 x i32], align 4
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4
+// CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 0
+// CHECK2-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 1
+// CHECK2-NEXT:    [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A2]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* [[ARRAY_BEGIN]], i32 2
+// CHECK2-NEXT:    [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i32* [[ARRAY_BEGIN]], [[TMP1]]
+// CHECK2-NEXT:    br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
+// CHECK2:       omp.arrayinit.body:
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK2-NEXT:    store i32 0, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]]
+// CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]]
+// CHECK2:       omp.arrayinit.done:
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i32*
+// CHECK2-NEXT:    [[TMP3:%.*]] = ptrtoint i32* [[TMP2]] to i64
+// CHECK2-NEXT:    [[TMP4:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64
+// CHECK2-NEXT:    [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]]
+// CHECK2-NEXT:    [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64)
+// CHECK2-NEXT:    [[TMP7:%.*]] = bitcast [2 x i32]* [[A2]] to i32*
+// CHECK2-NEXT:    [[TMP8:%.*]] = getelementptr i32, i32* [[TMP7]], i64 [[TMP6]]
+// CHECK2-NEXT:    [[TMP9:%.*]] = bitcast i32* [[TMP8]] to [10 x i32]*
+// CHECK2-NEXT:    [[RHS_BEGIN:%.*]] = bitcast [2 x i32]* [[A2]] to i32*
+// CHECK2-NEXT:    store i32 0, i32* [[I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK2:       for.cond:
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[I]], align 4
+// CHECK2-NEXT:    [[TMP11:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]]
+// CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK2:       for.body:
+// CHECK2-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK2:       for.inc:
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, i32* [[I]], align 4
+// CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP12]], 1
+// CHECK2-NEXT:    store i32 [[INC]], i32* [[I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK2:       for.end:
+// CHECK2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP14:%.*]] = bitcast i32* [[RHS_BEGIN]] to i8*
+// CHECK2-NEXT:    store i8* [[TMP14]], i8** [[TMP13]], align 4
+// CHECK2-NEXT:    [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+// CHECK2-NEXT:    [[TMP17:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// CHECK2-NEXT:    [[TMP18:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], i32 1, i32 4, i8* [[TMP17]], void (i8*, i8*)* @.omp.reduction.reduction_func.10, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// CHECK2-NEXT:    switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK2-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// CHECK2-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// CHECK2-NEXT:    ]
+// CHECK2:       .omp.reduction.case1:
+// CHECK2-NEXT:    [[TMP19:%.*]] = getelementptr i32, i32* [[ARRAYIDX]], i32 2
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[ARRAYIDX]], [[TMP19]]
+// CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
+// CHECK2:       omp.arraycpy.body:
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi i32* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK2-NEXT:    [[TMP20:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
+// CHECK2-NEXT:    [[TMP21:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP19]]
+// CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]]
+// CHECK2:       omp.arraycpy.done6:
+// CHECK2-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP16]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK2:       .omp.reduction.case2:
+// CHECK2-NEXT:    [[TMP22:%.*]] = getelementptr i32, i32* [[ARRAYIDX]], i32 2
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq i32* [[ARRAYIDX]], [[TMP22]]
+// CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]]
+// CHECK2:       omp.arraycpy.body8:
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi i32* [ [[RHS_BEGIN]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ]
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi i32* [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ]
+// CHECK2-NEXT:    [[TMP23:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4
+// CHECK2-NEXT:    [[TMP24:%.*]] = atomicrmw add i32* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP23]] monotonic, align 4
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP22]]
+// CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]]
+// CHECK2:       omp.arraycpy.done14:
+// CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK2:       .omp.reduction.default:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.10
+// CHECK2-SAME: (i8* noundef [[TMP0:%.*]], i8* noundef [[TMP1:%.*]]) #[[ATTR3]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 4
+// CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 4
+// CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 4
+// CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 4
+// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
+// CHECK2-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
+// CHECK2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4
+// CHECK2-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
+// CHECK2-NEXT:    [[TMP12:%.*]] = getelementptr i32, i32* [[TMP11]], i32 2
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i32* [[TMP11]], [[TMP12]]
+// CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
+// CHECK2:       omp.arraycpy.body:
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i32* [ [[TMP8]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i32* [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ]
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, i32* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1
+// CHECK2-NEXT:    [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i32* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]]
+// CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]]
+// CHECK2:       omp.arraycpy.done2:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l81
+// CHECK2-SAME: (i32 noundef [[SIZE:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR6]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[SIZE_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
+// CHECK2-NEXT:    [[SIZE_CASTED:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[TMP1]], i32* [[SIZE_CASTED]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[SIZE_CASTED]], align 4
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], [10 x i32]* [[TMP0]])
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..13
+// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR1]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[SIZE_ADDR:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
+// CHECK2-NEXT:    [[A1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4
+// CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
+// CHECK2-NEXT:    store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
+// CHECK2-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 3
+// CHECK2-NEXT:    store i32 0, i32* [[A1]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = bitcast [10 x i32]* [[TMP0]] to i32*
+// CHECK2-NEXT:    [[TMP2:%.*]] = ptrtoint i32* [[TMP1]] to i64
+// CHECK2-NEXT:    [[TMP3:%.*]] = ptrtoint i32* [[ARRAYIDX]] to i64
+// CHECK2-NEXT:    [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]]
+// CHECK2-NEXT:    [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64)
+// CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr i32, i32* [[A1]], i64 [[TMP5]]
+// CHECK2-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to [10 x i32]*
+// CHECK2-NEXT:    store i32 0, i32* [[I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK2:       for.cond:
+// CHECK2-NEXT:    [[TMP8:%.*]] = load i32, i32* [[I]], align 4
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
+// CHECK2-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]]
+// CHECK2-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// CHECK2:       for.body:
+// CHECK2-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK2:       for.inc:
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[I]], align 4
+// CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP10]], 1
+// CHECK2-NEXT:    store i32 [[INC]], i32* [[I]], align 4
+// CHECK2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
+// CHECK2:       for.end:
+// CHECK2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP12:%.*]] = bitcast i32* [[A1]] to i8*
+// CHECK2-NEXT:    store i8* [[TMP12]], i8** [[TMP11]], align 4
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4
+// CHECK2-NEXT:    [[TMP15:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// CHECK2-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], i32 1, i32 4, i8* [[TMP15]], void (i8*, i8*)* @.omp.reduction.reduction_func.14, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// CHECK2-NEXT:    switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// CHECK2-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// CHECK2-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// CHECK2-NEXT:    ]
+// CHECK2:       .omp.reduction.case1:
+// CHECK2-NEXT:    [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK2-NEXT:    [[TMP18:%.*]] = load i32, i32* [[A1]], align 4
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[ARRAYIDX]], align 4
+// CHECK2-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB3]], i32 [[TMP14]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK2:       .omp.reduction.case2:
+// CHECK2-NEXT:    [[TMP19:%.*]] = load i32, i32* [[A1]], align 4
+// CHECK2-NEXT:    [[TMP20:%.*]] = atomicrmw add i32* [[ARRAYIDX]], i32 [[TMP19]] monotonic, align 4
+// CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// CHECK2:       .omp.reduction.default:
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.14
+// CHECK2-SAME: (i8* noundef [[TMP0:%.*]], i8* noundef [[TMP1:%.*]]) #[[ATTR3]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 4
+// CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 4
+// CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 4
+// CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 4
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 4
+// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 4
+// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4
+// CHECK2-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to i32*
+// CHECK2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4
+// CHECK2-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32*
+// CHECK2-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
+// CHECK2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP8]], align 4
+// CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP11]], align 4
+// CHECK2-NEXT:    ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@main
+// CHECK2-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[SIZE:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    [[ARRAY:%.*]] = alloca i32*, align 4
+// CHECK2-NEXT:    [[RESULT:%.*]] = alloca i32, align 4
+// CHECK2-NEXT:    store i32 0, i32* [[RETVAL]], align 4
+// CHECK2-NEXT:    store i32 100, i32* [[SIZE]], align 4
+// CHECK2-NEXT:    [[CALL:%.*]] = call noalias noundef nonnull i8* @_Znaj(i32 noundef 400) #[[ATTR10:[0-9]+]]
+// CHECK2-NEXT:    [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32*
+// CHECK2-NEXT:    store i32* [[TMP0]], i32** [[ARRAY]], align 4
+// CHECK2-NEXT:    store i32 0, i32* [[RESULT]], align 4
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[ARRAY]], align 4
+// CHECK2-NEXT:    call void @_Z3sumPiiS_(i32* noundef [[TMP1]], i32 noundef 100, i32* noundef [[RESULT]])
+// CHECK2-NEXT:    ret i32 0
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
+// CHECK2-SAME: () #[[ATTR9:[0-9]+]] {
+// CHECK2-NEXT:  entry:
+// CHECK2-NEXT:    call void @__tgt_register_requires(i64 1)
+// CHECK2-NEXT:    ret void
+//

diff  --git a/clang/test/OpenMP/target_codegen.cpp b/clang/test/OpenMP/target_codegen.cpp
index 0d7526bfaea7a..6b6f5fbdb4604 100644
--- a/clang/test/OpenMP/target_codegen.cpp
+++ b/clang/test/OpenMP/target_codegen.cpp
@@ -76,11 +76,13 @@
 // CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i64] [i64 800]
 // CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i64] [i64 4, i64 2]
 // CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i64] [i64 800, i64 800]
+// CHECK-DAG: [[SIZET4:@.+]] = private unnamed_addr constant [9 x i64] [i64 4, i64 40, i64 {{4|8}}, i64 0, i64 400, i64 {{4|8}}, i64 {{4|8}}, i64 0, i64 {{12|16}}]
 // CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i64] [i64 800, i64 547, i64 800, i64 547, i64 547, i64 800, i64 800, i64 547, i64 547]
 // CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i64] [i64 4, i64 2, i64 40]
 // CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i64] [i64 800, i64 800, i64 547]
 // CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i64] [i64 4, i64 2, i64 1, i64 40]
 // CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i64] [i64 800, i64 800, i64 800, i64 547]
+// CHECK-DAG: [[SIZET7:@.+]] = private unnamed_addr constant [5 x i64] [i64 8, i64 4, i64 {{4|8}}, i64 {{4|8}}, i64 0]
 // CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i64] [i64 547, i64 800, i64 800, i64 800, i64 547]
 // CHECK-DAG: [[SIZET9:@.+]] = private unnamed_addr constant [1 x i64] [i64 12]
 // CHECK-DAG: [[MAPT10:@.+]] = private unnamed_addr constant [1 x i64] [i64 35]
@@ -272,37 +274,30 @@ int foo(int n) {
   // CHECK-32:    [[CNSZSIZE:%.+]] = mul nuw i32 [[CNELEMSIZE2]], 8
   // CHECK-32:    [[CNSIZE:%.+]] = sext i32 [[CNSZSIZE]] to i64
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i8** null, i8** null)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZ4:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i8** null, i8** null)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
-  // CHECK-DAG:   [[SR]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[SZ4]] = getelementptr inbounds [9 x i64], [9 x i64]* [[PSZ:%[^,]+]], i32 0, i32 0
 
-  // CHECK-DAG:   [[SADDR0:%.+]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S]], i32 0, i32 [[IDX0:0]]
-  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX0]]
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX0:0]]
   // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX0]]
-  // CHECK-DAG:   [[SADDR1:%.+]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S]], i32 0, i32 [[IDX1:1]]
-  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX1]]
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX1:1]]
   // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX1]]
-  // CHECK-DAG:   [[SADDR2:%.+]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S]], i32 0, i32 [[IDX2:2]]
-  // CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX2]]
+  // CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX2:2]]
   // CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX2]]
-  // CHECK-DAG:   [[SADDR3:%.+]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S]], i32 0, i32 [[IDX3:3]]
-  // CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX3]]
+  // CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX3:3]]
   // CHECK-DAG:   [[PADDR3:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX3]]
-  // CHECK-DAG:   [[SADDR4:%.+]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S]], i32 0, i32 [[IDX4:4]]
-  // CHECK-DAG:   [[BPADDR4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX4]]
+  // CHECK-DAG:   [[PSZ3:%.+]] = getelementptr inbounds [9 x i64], [9 x i64]* [[PSZ]], i32 0, i32 [[IDX3]]
+  // CHECK-DAG:   [[BPADDR4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX4:4]]
   // CHECK-DAG:   [[PADDR4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX4]]
-  // CHECK-DAG:   [[SADDR5:%.+]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S]], i32 0, i32 [[IDX5:5]]
-  // CHECK-DAG:   [[BPADDR5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX5]]
+  // CHECK-DAG:   [[BPADDR5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX5:5]]
   // CHECK-DAG:   [[PADDR5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX5]]
-  // CHECK-DAG:   [[SADDR6:%.+]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S]], i32 0, i32 [[IDX6:6]]
-  // CHECK-DAG:   [[BPADDR6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX6]]
+  // CHECK-DAG:   [[BPADDR6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX6:6]]
   // CHECK-DAG:   [[PADDR6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX6]]
-  // CHECK-DAG:   [[SADDR7:%.+]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S]], i32 0, i32 [[IDX7:7]]
-  // CHECK-DAG:   [[BPADDR7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX7]]
+  // CHECK-DAG:   [[BPADDR7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX7:7]]
   // CHECK-DAG:   [[PADDR7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX7]]
-  // CHECK-DAG:   [[SADDR8:%.+]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S]], i32 0, i32 [[IDX8:8]]
-  // CHECK-DAG:   [[BPADDR8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX8]]
+  // CHECK-DAG:   [[PSZ7:%.+]] = getelementptr inbounds [9 x i64], [9 x i64]* [[PSZ]], i32 0, i32 [[IDX7]]
+  // CHECK-DAG:   [[BPADDR8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX8:8]]
   // CHECK-DAG:   [[PADDR8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX8]]
 
   // The names below are not necessarily consistent with the names used for the
@@ -311,55 +306,48 @@ int foo(int n) {
   // CHECK-DAG:   [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to i[[SZ]]*
   // CHECK-DAG:   store i[[SZ]] [[VLA0]], i[[SZ]]* [[CBPADDR2]]
   // CHECK-DAG:   store i[[SZ]] [[VLA0]], i[[SZ]]* [[CPADDR2]]
-  // CHECK-DAG:   store i64 {{4|8}}, i64* [[SADDR2]]
 
   // CHECK-DAG:   [[CBPADDR6:%.+]] = bitcast i8** [[BPADDR6]] to i[[SZ]]*
   // CHECK-DAG:   [[CPADDR6:%.+]] = bitcast i8** [[PADDR6]] to i[[SZ]]*
   // CHECK-DAG:   store i[[SZ]] [[VLA1]], i[[SZ]]* [[CBPADDR6]]
   // CHECK-DAG:   store i[[SZ]] [[VLA1]], i[[SZ]]* [[CPADDR6]]
-  // CHECK-DAG:   store i64 {{4|8}}, i64* [[SADDR6]]
 
   // CHECK-DAG:   [[CBPADDR5:%.+]] = bitcast i8** [[BPADDR5]] to i[[SZ]]*
   // CHECK-DAG:   [[CPADDR5:%.+]] = bitcast i8** [[PADDR5]] to i[[SZ]]*
   // CHECK-DAG:   store i[[SZ]] 5, i[[SZ]]* [[CBPADDR5]]
   // CHECK-DAG:   store i[[SZ]] 5, i[[SZ]]* [[CPADDR5]]
-  // CHECK-DAG:   store i64 {{4|8}}, i64* [[SADDR5]]
 
   // CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]*
   // CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]*
   // CHECK-DAG:   store i[[SZ]] [[A_CVAL]], i[[SZ]]* [[CBPADDR0]]
   // CHECK-DAG:   store i[[SZ]] [[A_CVAL]], i[[SZ]]* [[CPADDR0]]
-  // CHECK-DAG:   store i64 4, i64* [[SADDR0]]
 
   // CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to [10 x float]**
   // CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to [10 x float]**
   // CHECK-DAG:   store [10 x float]* %{{.+}}, [10 x float]** [[CBPADDR1]]
   // CHECK-DAG:   store [10 x float]* %{{.+}}, [10 x float]** [[CPADDR1]]
-  // CHECK-DAG:   store i64 40, i64* [[SADDR1]]
 
   // CHECK-DAG:   [[CBPADDR3:%.+]] = bitcast i8** [[BPADDR3]] to float**
   // CHECK-DAG:   [[CPADDR3:%.+]] = bitcast i8** [[PADDR3]] to float**
   // CHECK-DAG:   store float* %{{.+}}, float** [[CBPADDR3]]
   // CHECK-DAG:   store float* %{{.+}}, float** [[CPADDR3]]
-  // CHECK-DAG:   store i64 [[BNSIZE]], i64* [[SADDR3]]
+  // CHECK-DAG:   store i64 [[BNSIZE]], i64* [[PSZ3]]
 
   // CHECK-DAG:   [[CBPADDR4:%.+]] = bitcast i8** [[BPADDR4]] to [5 x [10 x double]]**
   // CHECK-DAG:   [[CPADDR4:%.+]] = bitcast i8** [[PADDR4]] to [5 x [10 x double]]**
   // CHECK-DAG:   store [5 x [10 x double]]* %{{.+}}, [5 x [10 x double]]** [[CBPADDR4]]
   // CHECK-DAG:   store [5 x [10 x double]]* %{{.+}}, [5 x [10 x double]]** [[CPADDR4]]
-  // CHECK-DAG:   store i64 400, i64* [[SADDR4]]
 
   // CHECK-DAG:   [[CBPADDR7:%.+]] = bitcast i8** [[BPADDR7]] to double**
   // CHECK-DAG:   [[CPADDR7:%.+]] = bitcast i8** [[PADDR7]] to double**
   // CHECK-DAG:   store double* %{{.+}}, double** [[CBPADDR7]]
   // CHECK-DAG:   store double* %{{.+}}, double** [[CPADDR7]]
-  // CHECK-DAG:   store i64 [[CNSIZE]], i64* [[SADDR7]]
+  // CHECK-DAG:   store i64 [[CNSIZE]], i64* [[PSZ7]]
 
   // CHECK-DAG:   [[CBPADDR8:%.+]] = bitcast i8** [[BPADDR8]] to [[TT]]**
   // CHECK-DAG:   [[CPADDR8:%.+]] = bitcast i8** [[PADDR8]] to [[TT]]**
   // CHECK-DAG:   store [[TT]]* %{{.+}}, [[TT]]** [[CBPADDR8]]
   // CHECK-DAG:   store [[TT]]* %{{.+}}, [[TT]]** [[CPADDR8]]
-  // CHECK-DAG:   store i64 {{12|16}}, i64* [[SADDR8]]
 
   // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
   // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
@@ -580,25 +568,21 @@ int bar(int n){
 // CHECK-32:    [[CSZSIZE:%.+]] = mul nuw i32 [[CELEMSIZE2]], 2
 // CHECK-32:    [[CSIZE:%.+]] = sext i32 [[CSZSIZE]] to i64
 
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i8** null, i8** null)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZ7:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i8** null, i8** null)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
-// CHECK-DAG:   [[SR]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S:%.+]], i32 0, i32 0
-// CHECK-DAG:   [[SADDR0:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S]], i32 0, i32 [[IDX0:0]]
-// CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 [[IDX0]]
+// CHECK-DAG:   [[SZ7]] = getelementptr inbounds [5 x i64], [5 x i64]* [[PSZ:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 [[IDX0:0]]
 // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 0, i32 [[IDX0]]
-// CHECK-DAG:   [[SADDR1:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S]], i32 0, i32 [[IDX1:1]]
-// CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 [[IDX1]]
+// CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 [[IDX1:1]]
 // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 0, i32 [[IDX1]]
-// CHECK-DAG:   [[SADDR2:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S]], i32 0, i32 [[IDX2:2]]
-// CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 [[IDX2]]
+// CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 [[IDX2:2]]
 // CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 0, i32 [[IDX2]]
-// CHECK-DAG:   [[SADDR3:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S]], i32 0, i32 [[IDX3:3]]
-// CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 [[IDX3]]
+// CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 [[IDX3:3]]
 // CHECK-DAG:   [[PADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 0, i32 [[IDX3]]
-// CHECK-DAG:   [[SADDR4:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S]], i32 0, i32 [[IDX4:4]]
-// CHECK-DAG:   [[BPADDR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 [[IDX4]]
+// CHECK-DAG:   [[BPADDR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 [[IDX4:4]]
 // CHECK-DAG:   [[PADDR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 0, i32 [[IDX4]]
+// CHECK-DAG:   [[PSZ4:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[PSZ:%.+]], i32 0, i32 [[IDX4]]
 
 // The names below are not necessarily consistent with the names used for the
 // addresses above as some are repeated.
@@ -606,31 +590,27 @@ int bar(int n){
 // CHECK-DAG:   [[CPADDR4:%.+]] = bitcast i8** [[PADDR4]] to i16**
 // CHECK-DAG:   store i16* %{{.+}}, i16** [[CBPADDR4]]
 // CHECK-DAG:   store i16* %{{.+}}, i16** [[CPADDR4]]
-// CHECK-DAG:   store i64 [[CSIZE]], i64* [[SADDR4]]
+// CHECK-DAG:   store i64 [[CSIZE]], i64* [[PSZ4]]
 
 // CHECK-DAG:   [[CBPADDR3:%.+]] = bitcast i8** [[BPADDR3]] to i[[SZ]]*
 // CHECK-DAG:   [[CPADDR3:%.+]] = bitcast i8** [[PADDR3]] to i[[SZ]]*
 // CHECK-DAG:   store i[[SZ]] [[VLA0]], i[[SZ]]* [[CBPADDR3]]
 // CHECK-DAG:   store i[[SZ]] [[VLA0]], i[[SZ]]* [[CPADDR3]]
-// CHECK-DAG:   store i64 {{4|8}}, i64* [[SADDR3]]
 
 // CHECK-DAG:   [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to i[[SZ]]*
 // CHECK-DAG:   [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to i[[SZ]]*
 // CHECK-DAG:   store i[[SZ]] 2, i[[SZ]]* [[CBPADDR2]]
 // CHECK-DAG:   store i[[SZ]] 2, i[[SZ]]* [[CPADDR2]]
-// CHECK-DAG:   store i64 {{4|8}}, i64* [[SADDR2]]
 
 // CHECK-DAG:   [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]*
 // CHECK-DAG:   [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]*
 // CHECK-DAG:   store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CBPADDR1]]
 // CHECK-DAG:   store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CPADDR1]]
-// CHECK-DAG:   store i64 4, i64* [[SADDR1]]
 
 // CHECK-DAG:   [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to [[S1]]**
 // CHECK-DAG:   [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to double**
 // CHECK-DAG:   store [[S1]]* [[THIS:%.+]], [[S1]]** [[CBPADDR0]]
 // CHECK-DAG:   store double* [[A:%.+]], double** [[CPADDR0]]
-// CHECK-DAG:   store i64 8, i64* [[SADDR0]]
 
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]

diff  --git a/clang/test/OpenMP/target_data_codegen.cpp b/clang/test/OpenMP/target_data_codegen.cpp
index 2666a736dc583..10c2b2c02ad9a 100644
--- a/clang/test/OpenMP/target_data_codegen.cpp
+++ b/clang/test/OpenMP/target_data_codegen.cpp
@@ -37,7 +37,7 @@ double gc[100];
 
 // CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i64] [i64 5]
 
-// CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i64] [i64 sdiv exact (i64 sub (i64 ptrtoint (double** getelementptr (double*, double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), i32 1) to i64), i64 ptrtoint (double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1) to i64)), i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)), i64 24]
+// CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 24]
 // CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 281474976710673]
 
 // CK1: [[MTYPE05:@.+]] = {{.+}}constant [1 x i64] [i64 1025]
@@ -140,16 +140,19 @@ void foo(int arg) {
   {++arg;}
 
   // Region 04
-  // CK1-DAG: call void @__tgt_target_data_begin_mapper(%struct.ident_t* @{{.+}}, i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null, i8** null)
+  // CK1-DAG: call void @__tgt_target_data_begin_mapper(%struct.ident_t* @{{.+}}, i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%[^,]+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
+  // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[PSZ:%[^,]+]]
 
   // CK1-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
   // CK1-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK1-DAG: [[PS0:%.+]] = getelementptr inbounds {{.+}}[[PSZ]], i{{.+}} 0, i{{.+}} 0
   // CK1-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[ST]]**
   // CK1-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to double***
   // CK1-DAG: store [[ST]]* @gb, [[ST]]** [[CBP0]]
   // CK1-DAG: store double** getelementptr inbounds ([[ST]], [[ST]]* @gb, i32 0, i32 1), double*** [[CP0]]
+  // CK1-DAG: store i64 sdiv exact (i64 sub (i64 ptrtoint (double** getelementptr (double*, double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), i32 1) to i64), i64 ptrtoint (double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1) to i64)), i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)), i64* [[PS0]],
 
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
   // CK1-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
@@ -162,9 +165,10 @@ void foo(int arg) {
 
   // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
 
-  // CK1-DAG: call void @__tgt_target_data_end_mapper(%struct.ident_t* @{{.+}}, i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null, i8** null)
+  // CK1-DAG: call void @__tgt_target_data_end_mapper(%struct.ident_t* @{{.+}}, i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%[^,]+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]]
+  // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[PSZ]]
   #pragma omp target data map(to: gb.b[:3])
   {++arg;}
 
@@ -357,6 +361,7 @@ struct ST {
   }
 };
 
+// CK2: [[SIZE00:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 24]
 // CK2: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 281474976710677]
 
 // CK2-LABEL: _Z3bari
@@ -368,31 +373,29 @@ int bar(int arg){
 // Region 00
 // CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CK2: [[IFTHEN]]
-// CK2-DAG: call void @__tgt_target_data_begin_mapper(%struct.ident_t* @{{.+}}, i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null, i8** null)
+// CK2-DAG: call void @__tgt_target_data_begin_mapper(%struct.ident_t* @{{.+}}, i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%[^,]+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null, i8** null)
 // CK2-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK2-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK2-DAG: [[GEPBP]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]]
 // CK2-DAG: [[GEPP]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]]
-// CK2-DAG: [[GEPS]] = getelementptr inbounds [2 x i[[sz]]], [2 x i[[sz]]]* [[S:%[^,]+]]
+// CK2-DAG: [[GEPS]] = getelementptr inbounds [2 x i64], [2 x i64]* [[PS:%[^,]+]]
 
 // CK2-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
 // CK2-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-// CK2-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+// CK2-DAG: [[PS0:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[PS]], i{{.+}} 0, i{{.+}} 0
 // CK2-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[ST]]**
 // CK2-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to double***
 // CK2-DAG: store [[ST]]* [[VAR0:%.+]], [[ST]]** [[CBP0]]
 // CK2-DAG: store double** [[SEC0:%.+]], double*** [[CP0]]
-// CK2-DAG: store i[[sz]] {{%.+}}, i[[sz]]* [[S0]]
+// CK2-DAG: store i64 {{%.+}}, i64* [[PS0]],
 // CK2-DAG: [[SEC0]] = getelementptr inbounds {{.*}}[[ST]]* [[VAR0]], i32 0, i32 1
 
 // CK2-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
 // CK2-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-// CK2-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
 // CK2-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to double***
 // CK2-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to double**
 // CK2-DAG: store double** [[SEC0]], double*** [[CBP1]]
 // CK2-DAG: store double* [[SEC1:%.+]], double** [[CP1]]
-// CK2-DAG: store i[[sz]] 24, i[[sz]]* [[S1]]
 // CK2-DAG: [[SEC1]] = getelementptr inbounds {{.*}}double* [[SEC11:%[^,]+]], i{{.+}} 1
 // CK2-DAG: [[SEC11]] = load double*, double** [[SEC111:%[^,]+]],
 // CK2-DAG: [[SEC111]] = getelementptr inbounds {{.*}}[[ST]]* [[VAR0]], i32 0, i32 1
@@ -406,12 +409,12 @@ int bar(int arg){
 // CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 
 // CK2: [[IFTHEN]]
-// CK2-DAG: call void @__tgt_target_data_end_mapper(%struct.ident_t* @{{.+}}, i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null, i8** null)
+// CK2-DAG: call void @__tgt_target_data_end_mapper(%struct.ident_t* @{{.+}}, i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%[^,]+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null, i8** null)
 // CK2-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK2-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
 // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]]
-// CK2-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S]]
+// CK2-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[PS]]
 // CK2: br label %[[IFEND:[^,]+]]
 // CK2: [[IFELSE]]
 // CK2: br label %[[IFEND]]
@@ -475,6 +478,7 @@ struct STT {
   }
 };
 
+// CK4: [[SIZE00:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 24]
 // CK4: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 281474976711701]
 
 // CK4-LABEL: _Z3bari
@@ -486,31 +490,29 @@ int bar(int arg){
 // Region 00
 // CK4: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CK4: [[IFTHEN]]
-// CK4-DAG: call void @__tgt_target_data_begin_mapper(%struct.ident_t* @{{.+}}, i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null, i8** null)
+// CK4-DAG: call void @__tgt_target_data_begin_mapper(%struct.ident_t* @{{.+}}, i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null, i8** null)
 // CK4-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK4-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK4-DAG: [[GEPBP]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]]
 // CK4-DAG: [[GEPP]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]]
-// CK4-DAG: [[GEPS]] = getelementptr inbounds [2 x i[[sz]]], [2 x i[[sz]]]* [[S:%[^,]+]]
+// CK4-DAG: [[GEPS]] = getelementptr inbounds [2 x i64], [2 x i64]* [[PS:%[^,]+]]
 
 // CK4-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
 // CK4-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-// CK4-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+// CK4-DAG: [[PS0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i{{.+}} 0, i{{.+}} 0
 // CK4-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[STT]]**
 // CK4-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to double***
 // CK4-DAG: store [[STT]]* [[VAR0:%.+]], [[STT]]** [[CBP0]]
 // CK4-DAG: store double** [[SEC0:%.+]], double*** [[CP0]]
-// CK4-DAG: store i[[sz]] {{%.+}}, i[[sz]]* [[S0]]
+// CK4-DAG: store i64 {{%.+}}, i64* [[PS0]],
 // CK4-DAG: [[SEC0]] = getelementptr inbounds {{.*}}[[STT]]* [[VAR0]], i32 0, i32 1
 
 // CK4-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
 // CK4-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-// CK4-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
 // CK4-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to double***
 // CK4-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to double**
 // CK4-DAG: store double** [[SEC0]], double*** [[CBP1]]
 // CK4-DAG: store double* [[SEC1:%.+]], double** [[CP1]]
-// CK4-DAG: store i[[sz]] 24, i[[sz]]* [[S1]]
 // CK4-DAG: [[SEC1]] = getelementptr inbounds {{.*}}double* [[SEC11:%[^,]+]], i{{.+}} 1
 // CK4-DAG: [[SEC11]] = load double*, double** [[SEC111:%[^,]+]],
 // CK4-DAG: [[SEC111]] = getelementptr inbounds {{.*}}[[STT]]* [[VAR0]], i32 0, i32 1
@@ -524,12 +526,12 @@ int bar(int arg){
 // CK4: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 
 // CK4: [[IFTHEN]]
-// CK4-DAG: call void @__tgt_target_data_end_mapper(%struct.ident_t* @{{.+}}, i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null, i8** null)
+// CK4-DAG: call void @__tgt_target_data_end_mapper(%struct.ident_t* @{{.+}}, i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null, i8** null)
 // CK4-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK4-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK4-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
 // CK4-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]]
-// CK4-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S]]
+// CK4-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[PS]]
 // CK4: br label %[[IFEND:[^,]+]]
 // CK4: [[IFELSE]]
 // CK4: br label %[[IFEND]]
@@ -641,7 +643,7 @@ void test_present_modifier(int arg) {
 
   // Make sure the struct picks up present even if another element of the struct
   // doesn't have present.
-
+  // CK8: private unnamed_addr constant [11 x i64] [i64 0, i64 {{4|8}}, i64 {{4|8}}, i64 4, i64 4, i64 4, i64 0, i64 4, i64 {{4|8}}, i64 {{4|8}}, i64 4]
   // CK8: private unnamed_addr constant [11 x i64]
 
 // ps1

diff  --git a/clang/test/OpenMP/target_data_map_codegen_hold.cpp b/clang/test/OpenMP/target_data_map_codegen_hold.cpp
index d036f784c66ec..3c1f2e97ac3c9 100644
--- a/clang/test/OpenMP/target_data_map_codegen_hold.cpp
+++ b/clang/test/OpenMP/target_data_map_codegen_hold.cpp
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex ".offload_maptypes.*" ".offload_sizes.*" --global-hex-value-regex ".offload_maptypes.*"
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --prefix-filecheck-ir-name _ --global-value-regex ".offload_maptypes.*" ".offload_sizes.*" --global-hex-value-regex ".offload_maptypes.*"
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -59,7 +59,8 @@ struct S2 {
 // CHECK-PPC64LE: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 [[#0x2405]]]
 // CHECK-PPC64LE: @.offload_sizes.3 = private unnamed_addr constant [1 x i64] [i64 4]
 // CHECK-PPC64LE: @.offload_maptypes.4 = private unnamed_addr constant [1 x i64] [i64 [[#0x2003]]]
-// CHECK-PPC64LE: @.offload_maptypes.5 = private unnamed_addr constant [11 x i64] [i64 [[#0x2000]], i64 [[#0x1000000002003]], i64 [[#0x1000000002010]], i64 [[#0x2010]], i64 [[#0x2013]], i64 [[#0x3]], i64 [[#0x2000]], i64 [[#0x7000000002003]], i64 [[#0x7000000002010]], i64 [[#0x2010]], i64 [[#0x2013]]]
+// CHECK-PPC64LE: @.offload_sizes.5 = private unnamed_addr constant [11 x i64] [i64 0, i64 4, i64 8, i64 8, i64 4, i64 4, i64 0, i64 4, i64 8, i64 8, i64 4]
+// CHECK-PPC64LE: @.offload_maptypes.6 = private unnamed_addr constant [11 x i64] [i64 [[#0x2000]], i64 [[#0x1000000002003]], i64 [[#0x1000000002010]], i64 [[#0x2010]], i64 [[#0x2013]], i64 [[#0x3]], i64 [[#0x2000]], i64 [[#0x7000000002003]], i64 [[#0x7000000002010]], i64 [[#0x2010]], i64 [[#0x2013]]]
 //.
 // CHECK-I386: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 20]
 // CHECK-I386: @.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 [[#0x2001]]]
@@ -67,7 +68,8 @@ struct S2 {
 // CHECK-I386: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 [[#0x2405]]]
 // CHECK-I386: @.offload_sizes.3 = private unnamed_addr constant [1 x i64] [i64 4]
 // CHECK-I386: @.offload_maptypes.4 = private unnamed_addr constant [1 x i64] [i64 [[#0x2003]]]
-// CHECK-I386: @.offload_maptypes.5 = private unnamed_addr constant [11 x i64] [i64 [[#0x2000]], i64 [[#0x1000000002003]], i64 [[#0x1000000002010]], i64 [[#0x2010]], i64 [[#0x2013]], i64 [[#0x3]], i64 [[#0x2000]], i64 [[#0x7000000002003]], i64 [[#0x7000000002010]], i64 [[#0x2010]], i64 [[#0x2013]]]
+// CHECK-I386: @.offload_sizes.5 = private unnamed_addr constant [11 x i64] [i64 0, i64 4, i64 4, i64 4, i64 4, i64 4, i64 0, i64 4, i64 4, i64 4, i64 4]
+// CHECK-I386: @.offload_maptypes.6 = private unnamed_addr constant [11 x i64] [i64 [[#0x2000]], i64 [[#0x1000000002003]], i64 [[#0x1000000002010]], i64 [[#0x2010]], i64 [[#0x2013]], i64 [[#0x3]], i64 [[#0x2000]], i64 [[#0x7000000002003]], i64 [[#0x7000000002010]], i64 [[#0x2010]], i64 [[#0x2013]]]
 //.
 // CHECK-PPC64LE-LABEL: @_Z3fooi(
 // CHECK-PPC64LE-NEXT:  entry:
@@ -202,24 +204,24 @@ struct S2 {
 // CHECK-PPC64LE-NEXT:    [[TMP67:%.*]] = ptrtoint i8* [[TMP64]] to i64
 // CHECK-PPC64LE-NEXT:    [[TMP68:%.*]] = sub i64 [[TMP66]], [[TMP67]]
 // CHECK-PPC64LE-NEXT:    [[TMP69:%.*]] = sdiv exact i64 [[TMP68]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CHECK-PPC64LE-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
-// CHECK-PPC64LE-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to %struct.S2**
-// CHECK-PPC64LE-NEXT:    store %struct.S2* [[TMP30]], %struct.S2** [[TMP71]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
-// CHECK-PPC64LE-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to %struct.S1**
-// CHECK-PPC64LE-NEXT:    store %struct.S1* [[S]], %struct.S1** [[TMP73]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-PPC64LE-NEXT:    store i64 [[TMP49]], i64* [[TMP74]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 0
-// CHECK-PPC64LE-NEXT:    store i8* null, i8** [[TMP75]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 1
-// CHECK-PPC64LE-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to %struct.S2**
-// CHECK-PPC64LE-NEXT:    store %struct.S2* [[TMP30]], %struct.S2** [[TMP77]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 1
-// CHECK-PPC64LE-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to %struct.S1**
-// CHECK-PPC64LE-NEXT:    store %struct.S1* [[S]], %struct.S1** [[TMP79]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK-PPC64LE-NEXT:    store i64 4, i64* [[TMP80]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP70:%.*]] = bitcast [11 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK-PPC64LE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP70]], i8* align 8 bitcast ([11 x i64]* @.offload_sizes.5 to i8*), i64 88, i1 false)
+// CHECK-PPC64LE-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
+// CHECK-PPC64LE-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to %struct.S2**
+// CHECK-PPC64LE-NEXT:    store %struct.S2* [[TMP30]], %struct.S2** [[TMP72]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
+// CHECK-PPC64LE-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to %struct.S1**
+// CHECK-PPC64LE-NEXT:    store %struct.S1* [[S]], %struct.S1** [[TMP74]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-PPC64LE-NEXT:    store i64 [[TMP49]], i64* [[TMP75]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 0
+// CHECK-PPC64LE-NEXT:    store i8* null, i8** [[TMP76]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 1
+// CHECK-PPC64LE-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to %struct.S2**
+// CHECK-PPC64LE-NEXT:    store %struct.S2* [[TMP30]], %struct.S2** [[TMP78]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 1
+// CHECK-PPC64LE-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to %struct.S1**
+// CHECK-PPC64LE-NEXT:    store %struct.S1* [[S]], %struct.S1** [[TMP80]], align 8
 // CHECK-PPC64LE-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 1
 // CHECK-PPC64LE-NEXT:    store i8* null, i8** [[TMP81]], align 8
 // CHECK-PPC64LE-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 2
@@ -228,101 +230,85 @@ struct S2 {
 // CHECK-PPC64LE-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 2
 // CHECK-PPC64LE-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to %struct.S2***
 // CHECK-PPC64LE-NEXT:    store %struct.S2** [[PS10]], %struct.S2*** [[TMP85]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK-PPC64LE-NEXT:    store i64 8, i64* [[TMP86]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 2
-// CHECK-PPC64LE-NEXT:    store i8* null, i8** [[TMP87]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 3
-// CHECK-PPC64LE-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to %struct.S2***
-// CHECK-PPC64LE-NEXT:    store %struct.S2** [[PS10]], %struct.S2*** [[TMP89]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 3
-// CHECK-PPC64LE-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to %struct.S2***
-// CHECK-PPC64LE-NEXT:    store %struct.S2** [[PS13]], %struct.S2*** [[TMP91]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK-PPC64LE-NEXT:    store i64 8, i64* [[TMP92]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 3
-// CHECK-PPC64LE-NEXT:    store i8* null, i8** [[TMP93]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 4
-// CHECK-PPC64LE-NEXT:    [[TMP95:%.*]] = bitcast i8** [[TMP94]] to %struct.S2***
-// CHECK-PPC64LE-NEXT:    store %struct.S2** [[PS13]], %struct.S2*** [[TMP95]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 4
-// CHECK-PPC64LE-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to %struct.S1**
-// CHECK-PPC64LE-NEXT:    store %struct.S1* [[S17]], %struct.S1** [[TMP97]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK-PPC64LE-NEXT:    store i64 4, i64* [[TMP98]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 4
-// CHECK-PPC64LE-NEXT:    store i8* null, i8** [[TMP99]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 5
-// CHECK-PPC64LE-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i32**
-// CHECK-PPC64LE-NEXT:    store i32* [[ARG_ADDR]], i32** [[TMP101]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 5
-// CHECK-PPC64LE-NEXT:    [[TMP103:%.*]] = bitcast i8** [[TMP102]] to i32**
-// CHECK-PPC64LE-NEXT:    store i32* [[ARG_ADDR]], i32** [[TMP103]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK-PPC64LE-NEXT:    store i64 4, i64* [[TMP104]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 5
-// CHECK-PPC64LE-NEXT:    store i8* null, i8** [[TMP105]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 6
-// CHECK-PPC64LE-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to %struct.S2**
-// CHECK-PPC64LE-NEXT:    store %struct.S2* [[TMP50]], %struct.S2** [[TMP107]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 6
-// CHECK-PPC64LE-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to %struct.S1**
-// CHECK-PPC64LE-NEXT:    store %struct.S1* [[S18]], %struct.S1** [[TMP109]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK-PPC64LE-NEXT:    store i64 [[TMP69]], i64* [[TMP110]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 6
-// CHECK-PPC64LE-NEXT:    store i8* null, i8** [[TMP111]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 7
-// CHECK-PPC64LE-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to %struct.S2**
-// CHECK-PPC64LE-NEXT:    store %struct.S2* [[TMP50]], %struct.S2** [[TMP113]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 7
-// CHECK-PPC64LE-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to %struct.S1**
-// CHECK-PPC64LE-NEXT:    store %struct.S1* [[S18]], %struct.S1** [[TMP115]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK-PPC64LE-NEXT:    store i64 4, i64* [[TMP116]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 7
+// CHECK-PPC64LE-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 2
+// CHECK-PPC64LE-NEXT:    store i8* null, i8** [[TMP86]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 3
+// CHECK-PPC64LE-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to %struct.S2***
+// CHECK-PPC64LE-NEXT:    store %struct.S2** [[PS10]], %struct.S2*** [[TMP88]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 3
+// CHECK-PPC64LE-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to %struct.S2***
+// CHECK-PPC64LE-NEXT:    store %struct.S2** [[PS13]], %struct.S2*** [[TMP90]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 3
+// CHECK-PPC64LE-NEXT:    store i8* null, i8** [[TMP91]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 4
+// CHECK-PPC64LE-NEXT:    [[TMP93:%.*]] = bitcast i8** [[TMP92]] to %struct.S2***
+// CHECK-PPC64LE-NEXT:    store %struct.S2** [[PS13]], %struct.S2*** [[TMP93]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 4
+// CHECK-PPC64LE-NEXT:    [[TMP95:%.*]] = bitcast i8** [[TMP94]] to %struct.S1**
+// CHECK-PPC64LE-NEXT:    store %struct.S1* [[S17]], %struct.S1** [[TMP95]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 4
+// CHECK-PPC64LE-NEXT:    store i8* null, i8** [[TMP96]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 5
+// CHECK-PPC64LE-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to i32**
+// CHECK-PPC64LE-NEXT:    store i32* [[ARG_ADDR]], i32** [[TMP98]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 5
+// CHECK-PPC64LE-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to i32**
+// CHECK-PPC64LE-NEXT:    store i32* [[ARG_ADDR]], i32** [[TMP100]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 5
+// CHECK-PPC64LE-NEXT:    store i8* null, i8** [[TMP101]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 6
+// CHECK-PPC64LE-NEXT:    [[TMP103:%.*]] = bitcast i8** [[TMP102]] to %struct.S2**
+// CHECK-PPC64LE-NEXT:    store %struct.S2* [[TMP50]], %struct.S2** [[TMP103]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 6
+// CHECK-PPC64LE-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to %struct.S1**
+// CHECK-PPC64LE-NEXT:    store %struct.S1* [[S18]], %struct.S1** [[TMP105]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
+// CHECK-PPC64LE-NEXT:    store i64 [[TMP69]], i64* [[TMP106]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 6
+// CHECK-PPC64LE-NEXT:    store i8* null, i8** [[TMP107]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 7
+// CHECK-PPC64LE-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to %struct.S2**
+// CHECK-PPC64LE-NEXT:    store %struct.S2* [[TMP50]], %struct.S2** [[TMP109]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 7
+// CHECK-PPC64LE-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to %struct.S1**
+// CHECK-PPC64LE-NEXT:    store %struct.S1* [[S18]], %struct.S1** [[TMP111]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 7
+// CHECK-PPC64LE-NEXT:    store i8* null, i8** [[TMP112]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 8
+// CHECK-PPC64LE-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to %struct.S2***
+// CHECK-PPC64LE-NEXT:    store %struct.S2** [[PS19]], %struct.S2*** [[TMP114]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 8
+// CHECK-PPC64LE-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to %struct.S2***
+// CHECK-PPC64LE-NEXT:    store %struct.S2** [[PS21]], %struct.S2*** [[TMP116]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 8
 // CHECK-PPC64LE-NEXT:    store i8* null, i8** [[TMP117]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 8
+// CHECK-PPC64LE-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 9
 // CHECK-PPC64LE-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to %struct.S2***
-// CHECK-PPC64LE-NEXT:    store %struct.S2** [[PS19]], %struct.S2*** [[TMP119]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 8
+// CHECK-PPC64LE-NEXT:    store %struct.S2** [[PS21]], %struct.S2*** [[TMP119]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 9
 // CHECK-PPC64LE-NEXT:    [[TMP121:%.*]] = bitcast i8** [[TMP120]] to %struct.S2***
-// CHECK-PPC64LE-NEXT:    store %struct.S2** [[PS21]], %struct.S2*** [[TMP121]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK-PPC64LE-NEXT:    store i64 8, i64* [[TMP122]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 8
-// CHECK-PPC64LE-NEXT:    store i8* null, i8** [[TMP123]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 9
-// CHECK-PPC64LE-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.S2***
-// CHECK-PPC64LE-NEXT:    store %struct.S2** [[PS21]], %struct.S2*** [[TMP125]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 9
-// CHECK-PPC64LE-NEXT:    [[TMP127:%.*]] = bitcast i8** [[TMP126]] to %struct.S2***
-// CHECK-PPC64LE-NEXT:    store %struct.S2** [[PS24]], %struct.S2*** [[TMP127]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK-PPC64LE-NEXT:    store i64 8, i64* [[TMP128]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 9
-// CHECK-PPC64LE-NEXT:    store i8* null, i8** [[TMP129]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 10
-// CHECK-PPC64LE-NEXT:    [[TMP131:%.*]] = bitcast i8** [[TMP130]] to %struct.S2***
-// CHECK-PPC64LE-NEXT:    store %struct.S2** [[PS24]], %struct.S2*** [[TMP131]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 10
-// CHECK-PPC64LE-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to %struct.S1**
-// CHECK-PPC64LE-NEXT:    store %struct.S1* [[S28]], %struct.S1** [[TMP133]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 10
-// CHECK-PPC64LE-NEXT:    store i64 4, i64* [[TMP134]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 10
-// CHECK-PPC64LE-NEXT:    store i8* null, i8** [[TMP135]], align 8
-// CHECK-PPC64LE-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
-// CHECK-PPC64LE-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
-// CHECK-PPC64LE-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-PPC64LE-NEXT:    call void @__tgt_target_data_begin_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i32 11, i8** [[TMP136]], i8** [[TMP137]], i64* [[TMP138]], i64* getelementptr inbounds ([11 x i64], [11 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null)
-// CHECK-PPC64LE-NEXT:    [[TMP139:%.*]] = load i32, i32* [[ARG_ADDR]], align 4
-// CHECK-PPC64LE-NEXT:    [[INC32:%.*]] = add nsw i32 [[TMP139]], 1
+// CHECK-PPC64LE-NEXT:    store %struct.S2** [[PS24]], %struct.S2*** [[TMP121]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 9
+// CHECK-PPC64LE-NEXT:    store i8* null, i8** [[TMP122]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 10
+// CHECK-PPC64LE-NEXT:    [[TMP124:%.*]] = bitcast i8** [[TMP123]] to %struct.S2***
+// CHECK-PPC64LE-NEXT:    store %struct.S2** [[PS24]], %struct.S2*** [[TMP124]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 10
+// CHECK-PPC64LE-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to %struct.S1**
+// CHECK-PPC64LE-NEXT:    store %struct.S1* [[S28]], %struct.S1** [[TMP126]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 10
+// CHECK-PPC64LE-NEXT:    store i8* null, i8** [[TMP127]], align 8
+// CHECK-PPC64LE-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
+// CHECK-PPC64LE-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
+// CHECK-PPC64LE-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-PPC64LE-NEXT:    call void @__tgt_target_data_begin_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i32 11, i8** [[TMP128]], i8** [[TMP129]], i64* [[TMP130]], i64* getelementptr inbounds ([11 x i64], [11 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null)
+// CHECK-PPC64LE-NEXT:    [[TMP131:%.*]] = load i32, i32* [[ARG_ADDR]], align 4
+// CHECK-PPC64LE-NEXT:    [[INC32:%.*]] = add nsw i32 [[TMP131]], 1
 // CHECK-PPC64LE-NEXT:    store i32 [[INC32]], i32* [[ARG_ADDR]], align 4
-// CHECK-PPC64LE-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
-// CHECK-PPC64LE-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
-// CHECK-PPC64LE-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-PPC64LE-NEXT:    call void @__tgt_target_data_end_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i32 11, i8** [[TMP140]], i8** [[TMP141]], i64* [[TMP142]], i64* getelementptr inbounds ([11 x i64], [11 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null)
+// CHECK-PPC64LE-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
+// CHECK-PPC64LE-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
+// CHECK-PPC64LE-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-PPC64LE-NEXT:    call void @__tgt_target_data_end_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i32 11, i8** [[TMP132]], i8** [[TMP133]], i64* [[TMP134]], i64* getelementptr inbounds ([11 x i64], [11 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null)
 // CHECK-PPC64LE-NEXT:    ret void
 //
 // CHECK-I386-LABEL: @_Z3fooi(
@@ -458,24 +444,24 @@ struct S2 {
 // CHECK-I386-NEXT:    [[TMP67:%.*]] = ptrtoint i8* [[TMP64]] to i64
 // CHECK-I386-NEXT:    [[TMP68:%.*]] = sub i64 [[TMP66]], [[TMP67]]
 // CHECK-I386-NEXT:    [[TMP69:%.*]] = sdiv exact i64 [[TMP68]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CHECK-I386-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
-// CHECK-I386-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to %struct.S2**
-// CHECK-I386-NEXT:    store %struct.S2* [[TMP30]], %struct.S2** [[TMP71]], align 4
-// CHECK-I386-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
-// CHECK-I386-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to %struct.S1**
-// CHECK-I386-NEXT:    store %struct.S1* [[S]], %struct.S1** [[TMP73]], align 4
-// CHECK-I386-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-I386-NEXT:    store i64 [[TMP49]], i64* [[TMP74]], align 4
-// CHECK-I386-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 0
-// CHECK-I386-NEXT:    store i8* null, i8** [[TMP75]], align 4
-// CHECK-I386-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 1
-// CHECK-I386-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to %struct.S2**
-// CHECK-I386-NEXT:    store %struct.S2* [[TMP30]], %struct.S2** [[TMP77]], align 4
-// CHECK-I386-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 1
-// CHECK-I386-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to %struct.S1**
-// CHECK-I386-NEXT:    store %struct.S1* [[S]], %struct.S1** [[TMP79]], align 4
-// CHECK-I386-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK-I386-NEXT:    store i64 4, i64* [[TMP80]], align 4
+// CHECK-I386-NEXT:    [[TMP70:%.*]] = bitcast [11 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK-I386-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP70]], i8* align 4 bitcast ([11 x i64]* @.offload_sizes.5 to i8*), i32 88, i1 false)
+// CHECK-I386-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
+// CHECK-I386-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to %struct.S2**
+// CHECK-I386-NEXT:    store %struct.S2* [[TMP30]], %struct.S2** [[TMP72]], align 4
+// CHECK-I386-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
+// CHECK-I386-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to %struct.S1**
+// CHECK-I386-NEXT:    store %struct.S1* [[S]], %struct.S1** [[TMP74]], align 4
+// CHECK-I386-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-I386-NEXT:    store i64 [[TMP49]], i64* [[TMP75]], align 4
+// CHECK-I386-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 0
+// CHECK-I386-NEXT:    store i8* null, i8** [[TMP76]], align 4
+// CHECK-I386-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 1
+// CHECK-I386-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to %struct.S2**
+// CHECK-I386-NEXT:    store %struct.S2* [[TMP30]], %struct.S2** [[TMP78]], align 4
+// CHECK-I386-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 1
+// CHECK-I386-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to %struct.S1**
+// CHECK-I386-NEXT:    store %struct.S1* [[S]], %struct.S1** [[TMP80]], align 4
 // CHECK-I386-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 1
 // CHECK-I386-NEXT:    store i8* null, i8** [[TMP81]], align 4
 // CHECK-I386-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 2
@@ -484,101 +470,85 @@ struct S2 {
 // CHECK-I386-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 2
 // CHECK-I386-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to %struct.S2***
 // CHECK-I386-NEXT:    store %struct.S2** [[PS10]], %struct.S2*** [[TMP85]], align 4
-// CHECK-I386-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK-I386-NEXT:    store i64 4, i64* [[TMP86]], align 4
-// CHECK-I386-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 2
-// CHECK-I386-NEXT:    store i8* null, i8** [[TMP87]], align 4
-// CHECK-I386-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 3
-// CHECK-I386-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to %struct.S2***
-// CHECK-I386-NEXT:    store %struct.S2** [[PS10]], %struct.S2*** [[TMP89]], align 4
-// CHECK-I386-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 3
-// CHECK-I386-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to %struct.S2***
-// CHECK-I386-NEXT:    store %struct.S2** [[PS13]], %struct.S2*** [[TMP91]], align 4
-// CHECK-I386-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK-I386-NEXT:    store i64 4, i64* [[TMP92]], align 4
-// CHECK-I386-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 3
-// CHECK-I386-NEXT:    store i8* null, i8** [[TMP93]], align 4
-// CHECK-I386-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 4
-// CHECK-I386-NEXT:    [[TMP95:%.*]] = bitcast i8** [[TMP94]] to %struct.S2***
-// CHECK-I386-NEXT:    store %struct.S2** [[PS13]], %struct.S2*** [[TMP95]], align 4
-// CHECK-I386-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 4
-// CHECK-I386-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to %struct.S1**
-// CHECK-I386-NEXT:    store %struct.S1* [[S17]], %struct.S1** [[TMP97]], align 4
-// CHECK-I386-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK-I386-NEXT:    store i64 4, i64* [[TMP98]], align 4
-// CHECK-I386-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 4
-// CHECK-I386-NEXT:    store i8* null, i8** [[TMP99]], align 4
-// CHECK-I386-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 5
-// CHECK-I386-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i32**
-// CHECK-I386-NEXT:    store i32* [[ARG_ADDR]], i32** [[TMP101]], align 4
-// CHECK-I386-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 5
-// CHECK-I386-NEXT:    [[TMP103:%.*]] = bitcast i8** [[TMP102]] to i32**
-// CHECK-I386-NEXT:    store i32* [[ARG_ADDR]], i32** [[TMP103]], align 4
-// CHECK-I386-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK-I386-NEXT:    store i64 4, i64* [[TMP104]], align 4
-// CHECK-I386-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 5
-// CHECK-I386-NEXT:    store i8* null, i8** [[TMP105]], align 4
-// CHECK-I386-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 6
-// CHECK-I386-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to %struct.S2**
-// CHECK-I386-NEXT:    store %struct.S2* [[TMP50]], %struct.S2** [[TMP107]], align 4
-// CHECK-I386-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 6
-// CHECK-I386-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to %struct.S1**
-// CHECK-I386-NEXT:    store %struct.S1* [[S18]], %struct.S1** [[TMP109]], align 4
-// CHECK-I386-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK-I386-NEXT:    store i64 [[TMP69]], i64* [[TMP110]], align 4
-// CHECK-I386-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 6
-// CHECK-I386-NEXT:    store i8* null, i8** [[TMP111]], align 4
-// CHECK-I386-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 7
-// CHECK-I386-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to %struct.S2**
-// CHECK-I386-NEXT:    store %struct.S2* [[TMP50]], %struct.S2** [[TMP113]], align 4
-// CHECK-I386-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 7
-// CHECK-I386-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to %struct.S1**
-// CHECK-I386-NEXT:    store %struct.S1* [[S18]], %struct.S1** [[TMP115]], align 4
-// CHECK-I386-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK-I386-NEXT:    store i64 4, i64* [[TMP116]], align 4
-// CHECK-I386-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 7
+// CHECK-I386-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 2
+// CHECK-I386-NEXT:    store i8* null, i8** [[TMP86]], align 4
+// CHECK-I386-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 3
+// CHECK-I386-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to %struct.S2***
+// CHECK-I386-NEXT:    store %struct.S2** [[PS10]], %struct.S2*** [[TMP88]], align 4
+// CHECK-I386-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 3
+// CHECK-I386-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to %struct.S2***
+// CHECK-I386-NEXT:    store %struct.S2** [[PS13]], %struct.S2*** [[TMP90]], align 4
+// CHECK-I386-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 3
+// CHECK-I386-NEXT:    store i8* null, i8** [[TMP91]], align 4
+// CHECK-I386-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 4
+// CHECK-I386-NEXT:    [[TMP93:%.*]] = bitcast i8** [[TMP92]] to %struct.S2***
+// CHECK-I386-NEXT:    store %struct.S2** [[PS13]], %struct.S2*** [[TMP93]], align 4
+// CHECK-I386-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 4
+// CHECK-I386-NEXT:    [[TMP95:%.*]] = bitcast i8** [[TMP94]] to %struct.S1**
+// CHECK-I386-NEXT:    store %struct.S1* [[S17]], %struct.S1** [[TMP95]], align 4
+// CHECK-I386-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 4
+// CHECK-I386-NEXT:    store i8* null, i8** [[TMP96]], align 4
+// CHECK-I386-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 5
+// CHECK-I386-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to i32**
+// CHECK-I386-NEXT:    store i32* [[ARG_ADDR]], i32** [[TMP98]], align 4
+// CHECK-I386-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 5
+// CHECK-I386-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to i32**
+// CHECK-I386-NEXT:    store i32* [[ARG_ADDR]], i32** [[TMP100]], align 4
+// CHECK-I386-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 5
+// CHECK-I386-NEXT:    store i8* null, i8** [[TMP101]], align 4
+// CHECK-I386-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 6
+// CHECK-I386-NEXT:    [[TMP103:%.*]] = bitcast i8** [[TMP102]] to %struct.S2**
+// CHECK-I386-NEXT:    store %struct.S2* [[TMP50]], %struct.S2** [[TMP103]], align 4
+// CHECK-I386-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 6
+// CHECK-I386-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to %struct.S1**
+// CHECK-I386-NEXT:    store %struct.S1* [[S18]], %struct.S1** [[TMP105]], align 4
+// CHECK-I386-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
+// CHECK-I386-NEXT:    store i64 [[TMP69]], i64* [[TMP106]], align 4
+// CHECK-I386-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 6
+// CHECK-I386-NEXT:    store i8* null, i8** [[TMP107]], align 4
+// CHECK-I386-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 7
+// CHECK-I386-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to %struct.S2**
+// CHECK-I386-NEXT:    store %struct.S2* [[TMP50]], %struct.S2** [[TMP109]], align 4
+// CHECK-I386-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 7
+// CHECK-I386-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to %struct.S1**
+// CHECK-I386-NEXT:    store %struct.S1* [[S18]], %struct.S1** [[TMP111]], align 4
+// CHECK-I386-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 7
+// CHECK-I386-NEXT:    store i8* null, i8** [[TMP112]], align 4
+// CHECK-I386-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 8
+// CHECK-I386-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to %struct.S2***
+// CHECK-I386-NEXT:    store %struct.S2** [[PS19]], %struct.S2*** [[TMP114]], align 4
+// CHECK-I386-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 8
+// CHECK-I386-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to %struct.S2***
+// CHECK-I386-NEXT:    store %struct.S2** [[PS21]], %struct.S2*** [[TMP116]], align 4
+// CHECK-I386-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 8
 // CHECK-I386-NEXT:    store i8* null, i8** [[TMP117]], align 4
-// CHECK-I386-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 8
+// CHECK-I386-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 9
 // CHECK-I386-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to %struct.S2***
-// CHECK-I386-NEXT:    store %struct.S2** [[PS19]], %struct.S2*** [[TMP119]], align 4
-// CHECK-I386-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 8
+// CHECK-I386-NEXT:    store %struct.S2** [[PS21]], %struct.S2*** [[TMP119]], align 4
+// CHECK-I386-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 9
 // CHECK-I386-NEXT:    [[TMP121:%.*]] = bitcast i8** [[TMP120]] to %struct.S2***
-// CHECK-I386-NEXT:    store %struct.S2** [[PS21]], %struct.S2*** [[TMP121]], align 4
-// CHECK-I386-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK-I386-NEXT:    store i64 4, i64* [[TMP122]], align 4
-// CHECK-I386-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 8
-// CHECK-I386-NEXT:    store i8* null, i8** [[TMP123]], align 4
-// CHECK-I386-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 9
-// CHECK-I386-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.S2***
-// CHECK-I386-NEXT:    store %struct.S2** [[PS21]], %struct.S2*** [[TMP125]], align 4
-// CHECK-I386-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 9
-// CHECK-I386-NEXT:    [[TMP127:%.*]] = bitcast i8** [[TMP126]] to %struct.S2***
-// CHECK-I386-NEXT:    store %struct.S2** [[PS24]], %struct.S2*** [[TMP127]], align 4
-// CHECK-I386-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK-I386-NEXT:    store i64 4, i64* [[TMP128]], align 4
-// CHECK-I386-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 9
-// CHECK-I386-NEXT:    store i8* null, i8** [[TMP129]], align 4
-// CHECK-I386-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 10
-// CHECK-I386-NEXT:    [[TMP131:%.*]] = bitcast i8** [[TMP130]] to %struct.S2***
-// CHECK-I386-NEXT:    store %struct.S2** [[PS24]], %struct.S2*** [[TMP131]], align 4
-// CHECK-I386-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 10
-// CHECK-I386-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to %struct.S1**
-// CHECK-I386-NEXT:    store %struct.S1* [[S28]], %struct.S1** [[TMP133]], align 4
-// CHECK-I386-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 10
-// CHECK-I386-NEXT:    store i64 4, i64* [[TMP134]], align 4
-// CHECK-I386-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 10
-// CHECK-I386-NEXT:    store i8* null, i8** [[TMP135]], align 4
-// CHECK-I386-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
-// CHECK-I386-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
-// CHECK-I386-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-I386-NEXT:    call void @__tgt_target_data_begin_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i32 11, i8** [[TMP136]], i8** [[TMP137]], i64* [[TMP138]], i64* getelementptr inbounds ([11 x i64], [11 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null)
-// CHECK-I386-NEXT:    [[TMP139:%.*]] = load i32, i32* [[ARG_ADDR]], align 4
-// CHECK-I386-NEXT:    [[INC32:%.*]] = add nsw i32 [[TMP139]], 1
+// CHECK-I386-NEXT:    store %struct.S2** [[PS24]], %struct.S2*** [[TMP121]], align 4
+// CHECK-I386-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 9
+// CHECK-I386-NEXT:    store i8* null, i8** [[TMP122]], align 4
+// CHECK-I386-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 10
+// CHECK-I386-NEXT:    [[TMP124:%.*]] = bitcast i8** [[TMP123]] to %struct.S2***
+// CHECK-I386-NEXT:    store %struct.S2** [[PS24]], %struct.S2*** [[TMP124]], align 4
+// CHECK-I386-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 10
+// CHECK-I386-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to %struct.S1**
+// CHECK-I386-NEXT:    store %struct.S1* [[S28]], %struct.S1** [[TMP126]], align 4
+// CHECK-I386-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 10
+// CHECK-I386-NEXT:    store i8* null, i8** [[TMP127]], align 4
+// CHECK-I386-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
+// CHECK-I386-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
+// CHECK-I386-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-I386-NEXT:    call void @__tgt_target_data_begin_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i32 11, i8** [[TMP128]], i8** [[TMP129]], i64* [[TMP130]], i64* getelementptr inbounds ([11 x i64], [11 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null)
+// CHECK-I386-NEXT:    [[TMP131:%.*]] = load i32, i32* [[ARG_ADDR]], align 4
+// CHECK-I386-NEXT:    [[INC32:%.*]] = add nsw i32 [[TMP131]], 1
 // CHECK-I386-NEXT:    store i32 [[INC32]], i32* [[ARG_ADDR]], align 4
-// CHECK-I386-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
-// CHECK-I386-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
-// CHECK-I386-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-I386-NEXT:    call void @__tgt_target_data_end_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i32 11, i8** [[TMP140]], i8** [[TMP141]], i64* [[TMP142]], i64* getelementptr inbounds ([11 x i64], [11 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null)
+// CHECK-I386-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
+// CHECK-I386-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
+// CHECK-I386-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [11 x i64], [11 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-I386-NEXT:    call void @__tgt_target_data_end_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i32 11, i8** [[TMP132]], i8** [[TMP133]], i64* [[TMP134]], i64* getelementptr inbounds ([11 x i64], [11 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null)
 // CHECK-I386-NEXT:    ret void
 //
 void foo(int arg) {

diff  --git a/clang/test/OpenMP/target_data_map_pointer_array_subscript_codegen.cpp b/clang/test/OpenMP/target_data_map_pointer_array_subscript_codegen.cpp
index 779d6ee7eb211..7ce469450a7d7 100644
--- a/clang/test/OpenMP/target_data_map_pointer_array_subscript_codegen.cpp
+++ b/clang/test/OpenMP/target_data_map_pointer_array_subscript_codegen.cpp
@@ -35,6 +35,7 @@ MyObject *objects;
 
 // CHECK-DAG: [[SIZES0:@.+]] = private unnamed_addr constant [1 x i64] [i64 {{8|4}}]
 // CHECK-DAG: [[MAPS0:@.+]] = private unnamed_addr constant [1 x i64] [i64 17]
+// CHECK-DAG: [[SIZES1:@.+]] = private unnamed_addr constant [2 x i64] [i64 0, i64 4]
 // CHECK-DAG: [[MAPS1:@.+]] = private unnamed_addr constant [2 x i64] [i64 0, i64 281474976710673]
 // CHECK: @main
 int main(void) {

diff  --git a/clang/test/OpenMP/target_data_use_device_addr_codegen.cpp b/clang/test/OpenMP/target_data_use_device_addr_codegen.cpp
index 305ac361daa3a..9a3f7aa17b166 100644
--- a/clang/test/OpenMP/target_data_use_device_addr_codegen.cpp
+++ b/clang/test/OpenMP/target_data_use_device_addr_codegen.cpp
@@ -14,6 +14,7 @@
 // CHECK-DAG: [[SIZES1:@.+]] = private unnamed_addr constant [5 x i64] zeroinitializer
 // 64 = 0x40 = OMP_MAP_RETURN_PARAM
 // CHECK-DAG: [[MAPTYPES1:@.+]] = private unnamed_addr constant [5 x i64] [i64 64, i64 64, i64 64, i64 64, i64 64]
+// CHECK-DAG: [[SIZES2:@.+]] = private unnamed_addr constant [5 x i64] zeroinitializer
 // 0 = OMP_MAP_NONE
 // 281474976710720 = 0x1000000000040 = OMP_MAP_MEMBER_OF | OMP_MAP_RETURN_PARAM
 // CHECK-DAG: [[MAPTYPES2:@.+]] = private unnamed_addr constant [5 x i64] [i64 0, i64 281474976710720, i64 281474976710720, i64 281474976710720, i64 281474976710720]
@@ -158,32 +159,24 @@ int main() {
 // CHECK: [[PTR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 1
 // CHECK: [[PTR1_A_ADDR:%.+]] = bitcast i8** [[PTR1]] to i32**
 // CHECK: store i32* [[A_ADDR2]], i32** [[PTR1_A_ADDR]],
-// CHECK: [[SIZE1:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[SIZES]], i32 0, i32 1
-// CHECK: store i64 0, i64* [[SIZE1]],
 // CHECK: [[BPTR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 2
 // CHECK: [[BPTR2_PTR_ADDR:%.+]] = bitcast i8** [[BPTR2]] to i32***
 // CHECK: store i32** [[PTR_ADDR]], i32*** [[BPTR2_PTR_ADDR]],
 // CHECK: [[PTR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 2
 // CHECK: [[PTR2_PTR_ADDR:%.+]] = bitcast i8** [[PTR2]] to i32***
 // CHECK: store i32** [[PTR_ADDR]], i32*** [[PTR2_PTR_ADDR]],
-// CHECK: [[SIZE2:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[SIZES]], i32 0, i32 2
-// CHECK: store i64 0, i64* [[SIZE2]],
 // CHECK: [[BPTR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 3
 // CHECK: [[BPTR3_REF_PTR:%.+]] = bitcast i8** [[BPTR3]] to i32**
 // CHECK: store i32* [[REF_PTR]], i32** [[BPTR3_REF_PTR]],
 // CHECK: [[PTR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 3
 // CHECK: [[PTR3_REF_PTR:%.+]] = bitcast i8** [[PTR3]] to i32**
 // CHECK: store i32* [[REF_PTR]], i32** [[PTR3_REF_PTR]],
-// CHECK: [[SIZE3:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[SIZES]], i32 0, i32 3
-// CHECK: store i64 0, i64* [[SIZE3]],
 // CHECK: [[BPTR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 4
 // CHECK: [[BPTR4_ARR_ADDR:%.+]] = bitcast i8** [[BPTR4]] to [4 x i32]**
 // CHECK: store [4 x i32]* [[ARR_ADDR2]], [4 x i32]** [[BPTR4_ARR_ADDR]],
 // CHECK: [[PTR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 4
 // CHECK: [[PTR4_ARR_ADDR:%.+]] = bitcast i8** [[PTR4]] to [4 x i32]**
 // CHECK: store [4 x i32]* [[ARR_ADDR2]], [4 x i32]** [[PTR4_ARR_ADDR]],
-// CHECK: [[SIZE4:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[SIZES]], i32 0, i32 4
-// CHECK: store i64 0, i64* [[SIZE4]],
 // CHECK: [[BPTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 0
 // CHECK: [[PTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 0
 // CHECK: [[SIZE:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[SIZES]], i32 0, i32 0

diff  --git a/clang/test/OpenMP/target_defaultmap_codegen_01.cpp b/clang/test/OpenMP/target_defaultmap_codegen_01.cpp
index 5702196353b49..97df716edc085 100644
--- a/clang/test/OpenMP/target_defaultmap_codegen_01.cpp
+++ b/clang/test/OpenMP/target_defaultmap_codegen_01.cpp
@@ -739,7 +739,7 @@ void explicit_maps_single (){
 
 // CK15-LABEL: @.__omp_offloading_{{.*}}implicit_maps_variable_length_array{{.*}}_l{{[0-9]+}}.region_id = weak{{.*}} constant i8 0
 
-// We don't have a constant map size for VLAs.
+// CK15-DAG: [[SIZES:@.+]] = {{.+}}constant [3 x i64] [i64 {{4|8}}, i64 {{4|8}}, i64 0]
 // Map types:
 //  - OMP_MAP_LITERAL + OMP_MAP_TARGET_PARAM + OMP_MAP_IMPLICIT = 800 (vla size)
 //  - OMP_MAP_LITERAL + OMP_MAP_TARGET_PARAM + OMP_MAP_IMPLICIT = 800 (vla size)
@@ -757,21 +757,17 @@ void implicit_maps_variable_length_array (int a){
 
   // CK15-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
   // CK15-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
-  // CK15-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[SS]], i32 0, i32 0
   // CK15-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[sz:64|32]]*
   // CK15-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[sz]]*
   // CK15-DAG: store i[[sz]] 2, i[[sz]]* [[CBP0]]
   // CK15-DAG: store i[[sz]] 2, i[[sz]]* [[CP0]]
-  // CK15-DAG: store i64 {{8|4}}, i64* [[S0]],
 
   // CK15-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1
   // CK15-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1
-  // CK15-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[SS]], i32 0, i32 1
   // CK15-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[sz]]*
   // CK15-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[sz]]*
   // CK15-DAG: store i[[sz]] [[VAL:%.+]], i[[sz]]* [[CBP1]]
   // CK15-DAG: store i[[sz]] [[VAL]], i[[sz]]* [[CP1]]
-  // CK15-DAG: store i64 {{8|4}}, i64* [[S1]],
 
   // CK15-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 2
   // CK15-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 2

diff  --git a/clang/test/OpenMP/target_enter_data_codegen.cpp b/clang/test/OpenMP/target_enter_data_codegen.cpp
index f803f845e9274..39cdaeb165f5c 100644
--- a/clang/test/OpenMP/target_enter_data_codegen.cpp
+++ b/clang/test/OpenMP/target_enter_data_codegen.cpp
@@ -44,7 +44,7 @@ double gc[100];
 
 // CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i64] [i64 5]
 
-// CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i64] [i64 sdiv exact (i64 sub (i64 ptrtoint (double** getelementptr (double*, double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), i32 1) to i64), i64 ptrtoint (double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1) to i64)), i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)), i64 24]
+// CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 24]
 // CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 281474976710673]
 
 // CK1: [[MTYPE05:@.+]] = {{.+}}constant [1 x i64] [i64 1025]
@@ -146,16 +146,19 @@ void foo(int arg) {
   {++arg;}
 
   // Region 04
-  // CK1-DAG: call void @__tgt_target_data_begin_mapper(%struct.ident_t* @{{.+}}, i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null)
+  // CK1-DAG: call void @__tgt_target_data_begin_mapper(%struct.ident_t* @{{.+}}, i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
+  // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]]
 
   // CK1-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
   // CK1-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK1-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i{{.+}} 0, i{{.+}} 0
   // CK1-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[ST]]**
   // CK1-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to double***
   // CK1-DAG: store [[ST]]* @gb, [[ST]]** [[CBP0]]
   // CK1-DAG: store double** getelementptr inbounds ([[ST]], [[ST]]* @gb, i32 0, i32 1), double*** [[CP0]]
+  // CK1-DAG: store i64 sdiv exact (i64 sub (i64 ptrtoint (double** getelementptr (double*, double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), i32 1) to i64), i64 ptrtoint (double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1) to i64)), i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)), i64* [[S0]],
 
 
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
@@ -355,6 +358,7 @@ struct ST {
   }
 };
 
+// CK2: [[SIZES:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 24]
 // CK2: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 281474976710677]
 
 // CK2-LABEL: _Z3bari
@@ -366,21 +370,21 @@ int bar(int arg){
 // Region 00
 // CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CK2: [[IFTHEN]]
-// CK2-DAG: call void @__tgt_target_data_begin_mapper(%struct.ident_t* @{{.+}}, i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
+// CK2-DAG: call void @__tgt_target_data_begin_mapper(%struct.ident_t* @{{.+}}, i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
 // CK2-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK2-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
-// CK2-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+// CK2-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]]
 
 // CK2-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
 // CK2-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-// CK2-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+// CK2-DAG: [[PS0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i{{.+}} 0, i{{.+}} 0
 // CK2-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[ST]]**
 // CK2-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to double***
 // CK2-DAG: store [[ST]]* [[VAR0:%.+]], [[ST]]** [[CBP0]]
 // CK2-DAG: store double** [[SEC0:%.+]], double*** [[CP0]]
-// CK2-DAG: store i64 {{%.+}}, i64* [[S0]]
+// CK2-DAG: store i64 {{%.+}}, i64* [[PS0]],
 // CK2-DAG: [[SEC0]] = getelementptr inbounds {{.*}}[[ST]]* [[VAR0]], i32 0, i32 1
 
 // CK2-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
@@ -505,6 +509,7 @@ struct STT {
   }
 };
 
+// CK5: [[SIZES:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 24]
 // CK5: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 281474976711701]
 
 // CK5-LABEL: _Z3bari
@@ -516,21 +521,21 @@ int bar(int arg){
 // Region 00
 // CK5: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CK5: [[IFTHEN]]
-// CK5-DAG: call void @__tgt_target_data_begin_mapper(%struct.ident_t* @{{.+}}, i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
+// CK5-DAG: call void @__tgt_target_data_begin_mapper(%struct.ident_t* @{{.+}}, i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
 // CK5-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK5-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK5-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK5-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
-// CK5-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+// CK5-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]]
 
 // CK5-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
 // CK5-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-// CK5-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+// CK5-DAG: [[PS0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i{{.+}} 0, i{{.+}} 0
 // CK5-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[STT]]**
 // CK5-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to double***
 // CK5-DAG: store [[STT]]* [[VAR0:%.+]], [[STT]]** [[CBP0]]
 // CK5-DAG: store double** [[SEC0:%.+]], double*** [[CP0]]
-// CK5-DAG: store i64 {{%.+}}, i64* [[S0]]
+// CK5-DAG: store i64 {{%.+}}, i64* [[PS0]],
 // CK5-DAG: [[SEC0]] = getelementptr inbounds {{.*}}[[STT]]* [[VAR0]], i32 0, i32 1
 
 // CK5-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1

diff  --git a/clang/test/OpenMP/target_enter_data_depend_codegen.cpp b/clang/test/OpenMP/target_enter_data_depend_codegen.cpp
index 758b89b9421cb..0bff2d0a1056a 100644
--- a/clang/test/OpenMP/target_enter_data_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_enter_data_depend_codegen.cpp
@@ -37,7 +37,7 @@ double gc[100];
 
 // CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i64] zeroinitializer
 
-// CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i64] [i64 sdiv exact (i64 sub (i64 ptrtoint (double** getelementptr (double*, double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), i32 1) to i64), i64 ptrtoint (double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1) to i64)), i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)), i64 24]
+// CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 24]
 // CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 281474976710673]
 
 // CK1-LABEL: _Z3fooi
@@ -290,6 +290,8 @@ void foo(int arg) {
   // CK1: [[P0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%.+]], i32 0, i32 0
   // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to double***
   // CK1: store double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), double*** [[P0_BC]],
+  // CK1: [[PS0:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[PS:%.+]], i32 0, i32 0
+  // CK1: store i64 sdiv exact (i64 sub (i64 ptrtoint (double** getelementptr (double*, double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), i32 1) to i64), i64 ptrtoint (double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1) to i64)), i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)), i64* [[PS0]],
   // CK1: [[BP1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
   // CK1: [[BP1_BC:%.+]] = bitcast i8** [[BP1]] to double***
   // CK1: store double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), double*** [[BP1_BC]],
@@ -298,6 +300,7 @@ void foo(int arg) {
   // CK1: store double* %{{.+}}, double** [[P1_BC]],
   // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
   // CK1: [[GEPP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
+  // CK1: [[GEPS0:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[PS]], i32 0, i32 0
   // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{88|52}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
   // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
@@ -312,10 +315,12 @@ void foo(int arg) {
   // CK1-64: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{16|8}}, i1 false)
   // CK1-64: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2
   // CK1-64: [[BC_PRIVS_SIZES:%.+]] = bitcast [2 x i[[sz]]]* [[PRIVS_SIZES]] to i8*
-  // CK1-64: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} bitcast ([2 x i[[sz]]]* [[SIZE04]] to i8*), i[[sz]] {{16|8}}, i1 false)
+  // CK1-64: [[BC_SIZES:%.+]] = bitcast i64* [[GEPS0]] to i8*
+  // CK1-64: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} [[BC_SIZES]], i[[sz]] {{16|8}}, i1 false)
   // CK1-32: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0
   // CK1-32: [[BC_PRIVS_SIZES:%.+]] = bitcast [2 x i64]* [[PRIVS_SIZES]] to i8*
-  // CK1-32: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} bitcast ([2 x i64]* [[SIZE04]] to i8*), i[[sz]] {{16|8}}, i1 false)
+  // CK1-32: [[BC_SIZES:%.+]] = bitcast i64* [[GEPS0]] to i8*
+  // CK1-32: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} [[BC_SIZES]], i[[sz]] {{16|8}}, i1 false)
   // CK1-32: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1
   // CK1-32: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_BASEPTRS]] to i8*
   // CK1-32: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8*

diff  --git a/clang/test/OpenMP/target_exit_data_codegen.cpp b/clang/test/OpenMP/target_exit_data_codegen.cpp
index 2622c5809f492..831cd34bc6a50 100644
--- a/clang/test/OpenMP/target_exit_data_codegen.cpp
+++ b/clang/test/OpenMP/target_exit_data_codegen.cpp
@@ -43,7 +43,7 @@ double gc[100];
 
 // CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i64] [i64 6]
 
-// CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i64] [i64 sdiv exact (i64 sub (i64 ptrtoint (double** getelementptr (double*, double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), i32 1) to i64), i64 ptrtoint (double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1) to i64)), i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)), i64 24]
+// CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 24]
 // CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 281474976710672]
 
 // CK1: [[MTYPE05:@.+]] = {{.+}}constant [1 x i64] [i64 1026]
@@ -145,16 +145,19 @@ void foo(int arg) {
 
   // Region 04
   // CK1-NOT: __tgt_target_data_begin
-  // CK1-DAG: call void @__tgt_target_data_end_mapper(%struct.ident_t* @{{.+}}, i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null)
+  // CK1-DAG: call void @__tgt_target_data_end_mapper(%struct.ident_t* @{{.+}}, i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
+  // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]]
 
   // CK1-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
   // CK1-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK1-DAG: [[PS0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i{{.+}} 0, i{{.+}} 0
   // CK1-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[ST]]**
   // CK1-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to double***
   // CK1-DAG: store [[ST]]* @gb, [[ST]]** [[CBP0]]
   // CK1-DAG: store double** getelementptr inbounds ([[ST]], [[ST]]* @gb, i32 0, i32 1), double*** [[CP0]]
+  // CK1-DAG: store i64 sdiv exact (i64 sub (i64 ptrtoint (double** getelementptr (double*, double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), i32 1) to i64), i64 ptrtoint (double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1) to i64)), i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)), i64* [[PS0]],
 
 
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
@@ -266,6 +269,7 @@ struct ST {
   }
 };
 
+// CK2: [[SIZES:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 24]
 // CK2: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 281474976710676]
 
 // CK2-LABEL: _Z3bari
@@ -278,21 +282,21 @@ int bar(int arg){
 // CK2-NOT: __tgt_target_data_begin
 // CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CK2: [[IFTHEN]]
-// CK2-DAG: call void @__tgt_target_data_end_mapper(%struct.ident_t* @{{.+}}, i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:.+]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
+// CK2-DAG: call void @__tgt_target_data_end_mapper(%struct.ident_t* @{{.+}}, i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
 // CK2-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK2-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
-// CK2-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+// CK2-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]]
 
 // CK2-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
 // CK2-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-// CK2-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+// CK2-DAG: [[PS0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i{{.+}} 0, i{{.+}} 0
 // CK2-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[ST]]**
 // CK2-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to double***
 // CK2-DAG: store [[ST]]* [[VAR0:%[^,]+]], [[ST]]** [[CBP0]]
 // CK2-DAG: store double** [[SEC0:%[^,]+]], double*** [[CP0]]
-// CK2-DAG: store i64 [[CSVAL0:%[^,]+]], i64* [[S0]]
+// CK2-DAG: store i64 [[CSVAL0:%[^,]+]], i64* [[PS0]],
 // CK2-DAG: [[SEC0]] = getelementptr inbounds {{.*}}[[ST]]* [[VAR0]], i32 0, i32 1
 
 // CK2-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
@@ -370,6 +374,7 @@ struct STT {
   }
 };
 
+// CK4: [[SIZES:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 24]
 // CK4: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 281474976711700]
 
 // CK4-LABEL: _Z3bari
@@ -382,21 +387,21 @@ int bar(int arg){
 // CK4-NOT: __tgt_target_data_begin
 // CK4: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CK4: [[IFTHEN]]
-// CK4-DAG: call void @__tgt_target_data_end_mapper(%struct.ident_t* @{{.+}}, i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:.+]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
+// CK4-DAG: call void @__tgt_target_data_end_mapper(%struct.ident_t* @{{.+}}, i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
 // CK4-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK4-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK4-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK4-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
-// CK4-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+// CK4-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]]
 
 // CK4-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
 // CK4-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-// CK4-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+// CK4-DAG: [[PS0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i{{.+}} 0, i{{.+}} 0
 // CK4-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[STT]]**
 // CK4-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to double***
 // CK4-DAG: store [[STT]]* [[VAR0:%[^,]+]], [[STT]]** [[CBP0]]
 // CK4-DAG: store double** [[SEC0:%[^,]+]], double*** [[CP0]]
-// CK4-DAG: store i64 [[CSVAL0:%[^,]+]], i64* [[S0]]
+// CK4-DAG: store i64 [[CSVAL0:%[^,]+]], i64* [[PS0]],
 // CK4-DAG: [[SEC0]] = getelementptr inbounds {{.*}}[[STT]]* [[VAR0]], i32 0, i32 1
 
 // CK4-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1

diff  --git a/clang/test/OpenMP/target_exit_data_depend_codegen.cpp b/clang/test/OpenMP/target_exit_data_depend_codegen.cpp
index bc6d8aa49792c..c9d2b63277cee 100644
--- a/clang/test/OpenMP/target_exit_data_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_exit_data_depend_codegen.cpp
@@ -37,7 +37,7 @@ double gc[100];
 
 // CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i64] [i64 2]
 
-// CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i64] [i64 sdiv exact (i64 sub (i64 ptrtoint (double** getelementptr (double*, double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), i32 1) to i64), i64 ptrtoint (double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1) to i64)), i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)), i64 24]
+// CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 24]
 // CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 281474976710674]
 
 // CK1-LABEL: _Z3fooi
@@ -290,6 +290,8 @@ void foo(int arg) {
   // CK1: [[P0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%.+]], i32 0, i32 0
   // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to double***
   // CK1: store double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), double*** [[P0_BC]],
+  // CK1: [[PS0:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[PS:%.+]], i32 0, i32 0
+  // CK1: store i64 sdiv exact (i64 sub (i64 ptrtoint (double** getelementptr (double*, double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), i32 1) to i64), i64 ptrtoint (double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1) to i64)), i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)), i64* [[PS0]],
   // CK1: [[BP1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
   // CK1: [[BP1_BC:%.+]] = bitcast i8** [[BP1]] to double***
   // CK1: store double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), double*** [[BP1_BC]],
@@ -298,6 +300,7 @@ void foo(int arg) {
   // CK1: store double* %{{.+}}, double** [[P1_BC]],
   // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
   // CK1: [[GEPP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
+  // CK1: [[GEPS0:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[PS]], i32 0, i32 0
   // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{88|52}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
   // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
@@ -312,10 +315,12 @@ void foo(int arg) {
   // CK1-64: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{16|8}}, i1 false)
   // CK1-64: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2
   // CK1-64: [[BC_PRIVS_SIZES:%.+]] = bitcast [2 x i[[sz]]]* [[PRIVS_SIZES]] to i8*
-  // CK1-64: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} bitcast ([2 x i[[sz]]]* [[SIZE04]] to i8*), i[[sz]] {{16|8}}, i1 false)
+  // CK1-64: [[BC_SIZES:%.+]] = bitcast i64* [[GEPS0]] to i8*
+  // CK1-64: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} [[BC_SIZES]], i[[sz]] {{16|8}}, i1 false)
   // CK1-32: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0
   // CK1-32: [[BC_PRIVS_SIZES:%.+]] = bitcast [2 x i64]* [[PRIVS_SIZES]] to i8*
-  // CK1-32: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} bitcast ([2 x i64]* [[SIZE04]] to i8*), i[[sz]] {{16|8}}, i1 false)
+  // CK1-32: [[BC_SIZES:%.+]] = bitcast i64* [[GEPS0]] to i8*
+  // CK1-32: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} [[BC_SIZES]], i[[sz]] {{16|8}}, i1 false)
   // CK1-32: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1
   // CK1-32: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_BASEPTRS]] to i8*
   // CK1-32: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8*

diff  --git a/clang/test/OpenMP/target_firstprivate_codegen.cpp b/clang/test/OpenMP/target_firstprivate_codegen.cpp
index 8197d9a0b23be..21287e0d0fdee 100644
--- a/clang/test/OpenMP/target_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_firstprivate_codegen.cpp
@@ -57,9 +57,11 @@ int ga = 5;
 
 // CHECK-DAG:  [[SIZET:@.+]] = private unnamed_addr constant [3 x i{{32|64}}] [i[[SZ:32|64]] 4, i{{64|32}} {{8|4}}, i[[SZ:32|64]] 4]
 // CHECK-DAG:  [[MAPT:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 49, i64 288]
+// CHECK-DAG:  [[SIZET2:@.+]] = private unnamed_addr constant [9 x i64] [i64 2, i64 40, i64 {{4|8}}, i64 0, i64 400, i64 {{4|8}}, i64 {{4|8}}, i64 0, i64 {{12|16}}]
 // CHECK-DAG:  [[MAPT2:@.+]] = private unnamed_addr constant [9 x i64] [i64 288, i64 161, i64 800, i64 161, i64 161, i64 800, i64 800, i64 161, i64 161]
 // CHECK-DAG:  [[SIZET3:@.+]] = private unnamed_addr constant [2 x i{{32|64}}] [i{{32|64}} 0, i{{32|64}} 8]
 // CHECK-DAG:  [[MAPT3:@.+]] = private unnamed_addr constant [2 x i64] [i64 32, i64 161]
+// CHECK-DAG:  [[SIZET4:@.+]] = private unnamed_addr constant [5 x i64] [i64 8, i64 4, i64 {{4|8}}, i64 {{4|8}}, i64 0]
 // CHECK-DAG:  [[MAPT4:@.+]] = private unnamed_addr constant [5 x i64] [i64 547, i64 288, i64 800, i64 800, i64 161]
 // CHECK-DAG:  [[SIZET5:@.+]] = private unnamed_addr constant [3 x i{{32|64}}] [i[[SZ]] 4, i[[SZ]] 1, i[[SZ]] 40]
 // CHECK-DAG:  [[MAPT5:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 161]
@@ -184,8 +186,6 @@ int foo(int n, double *ptr) {
   // CHECK:  [[PTR_GEP2_0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
   // CHECK:  [[ACAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP2_0]] to i{{[0-9]+}}*
   // CHECK:  store i{{[0-9]+}} [[A2CAST_VAL]], i{{[0-9]+}}* [[ACAST_TOPTR]],
-  // CHECK:  [[SIZE_GEPA2:%.+]] = getelementptr inbounds [9 x i{{[0-9]+}}], [9 x i{{[0-9]+}}]* [[SIZET2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK:  store i{{[0-9]+}} 2, i{{[0-9]+}}* [[SIZE_GEPA2]],
 
   // firstprivate(b): base_ptr = &b[0], ptr = &b[0], size = 40 (sizeof(float)*10)
   // CHECK:  [[BASE_PTR_GEP2_1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
@@ -194,8 +194,6 @@ int foo(int n, double *ptr) {
   // CHECK:  [[PTR_GEP2_1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK:  [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP2_1]] to [10 x float]**
   // CHECK:  store [10 x float]* [[B]], [10 x float]** [[BCAST_TOPTR]],
-  // CHECK:  [[SIZE_GEPB:%.+]] = getelementptr inbounds [9 x i{{[0-9]+}}], [9 x i{{[0-9]+}}]* [[SIZET2]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK:  store i{{[0-9]+}} 40, i{{[0-9]+}}* [[SIZE_GEPB]],
 
   // firstprivate(bn), 2 entries, n and bn: (1) base_ptr = n, ptr = n, size = 8 ; (2) base_ptr = &c[0], ptr = &c[0], size = n*sizeof(float)
   // CHECK:  [[BASE_PTR_GEP2_2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
@@ -206,8 +204,6 @@ int foo(int n, double *ptr) {
   // CHECK:  [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP2_2]] to i{{[0-9]+}}*
   // CHECK-64:  store i{{[0-9]+}} [[N_EXT]], i{{[0-9]+}}* [[BCAST_TOPTR]],
   // CHECK-32:  store i{{[0-9]+}} [[N_ADDR_VAL]], i{{[0-9]+}}* [[BCAST_TOPTR]],
-  // CHECK:  [[SIZE_GEPBN_1:%.+]] = getelementptr inbounds [9 x i{{[0-9]+}}], [9 x i{{[0-9]+}}]* [[SIZET2]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
-  // CHECK:  store i{{[0-9]+}} {{[0-9]}}, i{{[0-9]+}}* [[SIZE_GEPBN_1]],
   // CHECK:  [[BASE_PTR_GEP2_3:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
   // CHECK:  [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP2_3]] to float**
   // CHECK:  store float* [[BN_VLA]], float** [[BCAST_TOPTR]],
@@ -221,8 +217,6 @@ int foo(int n, double *ptr) {
   // CHECK:  [[PTR_GEP2_4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 4
   // CHECK:  [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP2_4]] to [5 x [10 x double]]**
   // CHECK:  store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[BCAST_TOPTR]],
-  // CHECK:  [[SIZE_GEPC_4:%.+]] = getelementptr inbounds [9 x i{{[0-9]+}}], [9 x i{{[0-9]+}}]* [[SIZET2]], i{{[0-9]+}} 0, i{{[0-9]+}} 4
-  // CHECK:  store i{{[0-9]+}} 400, i{{[0-9]+}}* [[SIZE_GEPC_4]],
 
   // firstprivate(cn), 3 entries, 5, n, cn: (1) base_ptr = 5, ptr = 5, size = 8; (2) (1) base_ptr = n, ptr = n, size = 8; (3) base_ptr = &cn[0], ptr = &cn[0], size = 5*n*sizeof(double)
   // CHECK:  [[BASE_PTR_GEP2_5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 5
@@ -231,8 +225,6 @@ int foo(int n, double *ptr) {
   // CHECK:  [[PTR_GEP2_5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 5
   // CHECK:  [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP2_5]] to i{{[0-9]+}}*
   // CHECK:  store i{{[0-9]+}} 5, i{{[0-9]+}}* [[BCAST_TOPTR]],
-  // CHECK:  [[SIZE_GEPCN_5:%.+]] = getelementptr inbounds [9 x i{{[0-9]+}}], [9 x i{{[0-9]+}}]* [[SIZET2]], i{{[0-9]+}} 0, i{{[0-9]+}} 5
-  // CHECK:  store i{{[0-9]+}} {{[0-9]}}, i{{[0-9]+}}* [[SIZE_GEPCN_5]],
   // CHECK:  [[BASE_PTR_GEP2_6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 6
   // CHECK:  [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP2_6]] to i{{[0-9]+}}*
   // CHECK-64:  store i{{[0-9]+}} [[N_EXT2]], i{{[0-9]+}}* [[BCAST_TOPTR]],
@@ -241,8 +233,6 @@ int foo(int n, double *ptr) {
   // CHECK:  [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP2_6]] to i{{[0-9]+}}*
   // CHECK-64:  store i{{[0-9]+}} [[N_EXT2]], i{{[0-9]+}}* [[BCAST_TOPTR]],
   // CHECK-32:  store i{{[0-9]+}} [[N_ADDR_VAL2]], i{{[0-9]+}}* [[BCAST_TOPTR]],
-  // CHECK:  [[SIZE_GEPCN_6:%.+]] = getelementptr inbounds [9 x i{{[0-9]+}}], [9 x i{{[0-9]+}}]* [[SIZET2]], i{{[0-9]+}} 0, i{{[0-9]+}} 6
-  // CHECK:  store i{{[0-9]+}} {{[0-9]}}, i{{[0-9]+}}* [[SIZE_GEPCN_6]],
   // CHECK:  [[BASE_PTR_GEP2_7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 7
   // CHECK:  [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP2_7]] to double**
   // CHECK:  store double* [[CN_VLA]], double** [[BCAST_TOPTR]],
@@ -259,8 +249,6 @@ int foo(int n, double *ptr) {
   // CHECK:  [[PTR_GEP2_8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 8
   // CHECK:  [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP2_8]] to [[TT]]**
   // CHECK:  store [[TT]]* [[D]], [[TT]]** [[BCAST_TOPTR]],
-  // CHECK:  [[SIZE_GEPCN_8:%.+]] = getelementptr inbounds [9 x i{{[0-9]+}}], [9 x i{{[0-9]+}}]* [[SIZET2]], i{{[0-9]+}} 0, i{{[0-9]+}} 8
-  // CHECK:  store i{{[0-9]+}} {{[0-9]+}}, i{{[0-9]+}}* [[SIZE_GEPCN_8]],
 
   // CHECK:  [[BASE_PTR_GEP_ARG2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
   // CHECK:  [[PTR_GEP_ARG2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
@@ -457,8 +445,6 @@ struct S1 {
   // CHECK:  [[P0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
   // CHECK:  [[CP0:%.+]] = bitcast i8** [[P0]] to double**
   // CHECK:  store double* [[A:%.+]], double** [[CP0]],
-  // CHECK:  [[SZ0:%.+]] = getelementptr inbounds [5 x i{{[0-9]+}}], [5 x i{{[0-9]+}}]* [[SIZET4]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK:  store i{{[0-9]+}} 8, i{{[0-9]+}}* [[SZ0]],
 
   // firstprivate(b): base_ptr = b, ptr = b, size = 4 (pass by-value)
   // CHECK:  [[BASE_PTRS_GEP4_1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BASE_PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
@@ -467,8 +453,6 @@ struct S1 {
   // CHECK:  [[PTRS_GEP4_1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
   // CHECK:  [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTRS_GEP4_1]] to i{{[0-9]+}}*
   // CHECK:  store i{{[0-9]+}} [[B_CAST]], i{{[0-9]+}}* [[BCAST_TOPTR]],
-  // CHECK:  [[SIZES_GEP4_1:%.+]] = getelementptr inbounds [5 x i{{[0-9]+}}], [5 x i{{[0-9]+}}]* [[SIZET4]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-  // CHECK:  store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIZES_GEP4_1]],
 
   // firstprivate(c), 3 entries: 2, n, c
   // CHECK:  [[BASE_PTRS_GEP4_2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BASE_PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
@@ -477,18 +461,12 @@ struct S1 {
   // CHECK:  [[PTRS_GEP4_2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
   // CHECK:  [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTRS_GEP4_2]] to i{{[0-9]+}}*
   // CHECK:  store i{{[0-9]+}} 2, i{{[0-9]+}}* [[BCAST_TOPTR]],
-  // CHECK:  [[SIZES_GEP4_2:%.+]] = getelementptr inbounds [5 x i{{[0-9]+}}], [5 x i{{[0-9]+}}]* [[SIZET4]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
-  // CHECK-64:  store i{{[0-9]+}} 8, i{{[0-9]+}}* [[SIZES_GEP4_2]],
-  // CHECK-32:  store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIZES_GEP4_2]],
   // CHECK:  [[BASE_PTRS_GEP4_3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BASE_PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
   // CHECK:  [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTRS_GEP4_3]] to i{{[0-9]+}}*
   // CHECK:  store i{{[0-9]+}} [[N:%.+]], i{{[0-9]+}}* [[BCAST_TOPTR]],
   // CHECK:  [[PTRS_GEP4_3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
   // CHECK:  [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTRS_GEP4_3]] to i{{[0-9]+}}*
   // CHECK:  store i{{[0-9]+}} [[N]], i{{[0-9]+}}* [[BCAST_TOPTR]],
-  // CHECK:  [[SIZES_GEP4_3:%.+]] = getelementptr inbounds [5 x i{{[0-9]+}}], [5 x i{{[0-9]+}}]* [[SIZET4]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
-  // CHECK-64:  store i{{[0-9]+}} 8, i{{[0-9]+}}* [[SIZES_GEP4_3]],
-  // CHECK-32:  store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIZES_GEP4_3]],
   // CHECK:  [[BASE_PTRS_GEP4_4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BASE_PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 4
   // CHECK:  [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTRS_GEP4_4]] to i{{[0-9]+}}**
   // CHECK:  store i{{[0-9]+}}* [[B:%.+]], i{{[0-9]+}}** [[BCAST_TOPTR]],

diff  --git a/clang/test/OpenMP/target_map_codegen_12.cpp b/clang/test/OpenMP/target_map_codegen_12.cpp
index 598ceef40e6cb..ca39621aa50c3 100644
--- a/clang/test/OpenMP/target_map_codegen_12.cpp
+++ b/clang/test/OpenMP/target_map_codegen_12.cpp
@@ -35,7 +35,7 @@
 
 // CK13-LABEL: @.__omp_offloading_{{.*}}implicit_maps_variable_length_array{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
 
-// We don't have a constant map size for VLAs.
+// CK13-DAG: [[SIZES:@.+]] = {{.+}}constant [3 x i64] [i64 {{8|4}}, i64 {{8|4}}, i64 0]
 // Map types:
 //  - OMP_MAP_PRIVATE_VAL + OMP_MAP_TARGET_PARAM + OMP_MAP_IMPLICIT = 800 (vla size)
 //  - OMP_MAP_PRIVATE_VAL + OMP_MAP_TARGET_PARAM + OMP_MAP_IMPLICIT = 800 (vla size)
@@ -53,21 +53,17 @@ void implicit_maps_variable_length_array (int a){
 
   // CK13-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
   // CK13-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
-  // CK13-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[SS]], i32 0, i32 0
   // CK13-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[sz:64|32]]*
   // CK13-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[sz]]*
   // CK13-DAG: store i[[sz]] 2, i[[sz]]* [[CBP0]]
   // CK13-DAG: store i[[sz]] 2, i[[sz]]* [[CP0]]
-  // CK13-DAG: store i64 {{8|4}}, i64* [[S0]],
 
   // CK13-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1
   // CK13-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1
-  // CK13-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[SS]], i32 0, i32 1
   // CK13-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[sz]]*
   // CK13-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[sz]]*
   // CK13-DAG: store i[[sz]] [[VAL:%.+]], i[[sz]]* [[CBP1]]
   // CK13-DAG: store i[[sz]] [[VAL]], i[[sz]]* [[CP1]]
-  // CK13-DAG: store i64 {{8|4}}, i64* [[S1]],
 
   // CK13-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 2
   // CK13-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 2

diff  --git a/clang/test/OpenMP/target_map_codegen_13.cpp b/clang/test/OpenMP/target_map_codegen_13.cpp
index 27fe1698db9cc..38fc90a951eaf 100644
--- a/clang/test/OpenMP/target_map_codegen_13.cpp
+++ b/clang/test/OpenMP/target_map_codegen_13.cpp
@@ -38,6 +38,7 @@
 // CK14-LABEL: @.__omp_offloading_{{.*}}foo{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
 
 
+// CK14-DAG: [[SIZES:@.+]] = {{.+}}constant [4 x i64] [i64 0, i64 4, i64 8, i64 4]
 // Map types:
 // - OMP_MAP_TARGET_PARAM = 32
 // - OMP_MAP_TO + OMP_MAP_FROM | OMP_MAP_IMPLICIT | OMP_MAP_MEMBER_OF = 281474976711171
@@ -81,30 +82,24 @@ void implicit_maps_class (int a){
 
   // CK14-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1
   // CK14-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1
-  // CK14-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i32 0, i32 1
   // CK14-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to [[ST]]**
   // CK14-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
   // CK14-DAG: store [[ST]]* [[DECL]], [[ST]]** [[CBP1]]
   // CK14-DAG: store i32* [[A]], i32** [[CP1]]
-  // CK14-DAG: store i64 4, i64* [[S1]]
 
   // CK14-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 2
   // CK14-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 2
-  // CK14-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i32 0, i32 2
   // CK14-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [[ST]]**
   // CK14-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to double**
   // CK14-DAG: store [[ST]]* [[DECL]], [[ST]]** [[CBP2]]
   // CK14-DAG: store double* %{{.+}}, double** [[CP2]]
-  // CK14-DAG: store i64 8, i64* [[S2]]
 
   // CK14-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 3
   // CK14-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 3
-  // CK14-DAG: [[S3:%.+]] = getelementptr inbounds {{.+}}[[S]], i32 0, i32 3
   // CK14-DAG: [[CBP3:%.+]] = bitcast i8** [[BP3]] to i[[sz:64|32]]*
   // CK14-DAG: [[CP3:%.+]] = bitcast i8** [[P3]] to i[[sz]]*
   // CK14-DAG: store i[[sz]] [[VAL:%.+]], i[[sz]]* [[CBP3]]
   // CK14-DAG: store i[[sz]] [[VAL]], i[[sz]]* [[CP3]]
-  // CK14-DAG: store i64 4, i64* [[S3]]
   // CK14-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
   // CK14-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
   // CK14-64-DAG: store i32 {{.+}}, i32* [[CADDR]],

diff  --git a/clang/test/OpenMP/target_map_codegen_14.cpp b/clang/test/OpenMP/target_map_codegen_14.cpp
index e08c2361cbd8b..a3dfd091a89f1 100644
--- a/clang/test/OpenMP/target_map_codegen_14.cpp
+++ b/clang/test/OpenMP/target_map_codegen_14.cpp
@@ -34,12 +34,14 @@
 #ifdef CK15
 
 // CK15: [[ST:%.+]] = type { i32, double }
+// CK15: [[SIZES:@.+]] = {{.+}}constant [4 x i64] [i64 0, i64 4, i64 8, i64 4]
 // Map types:
 // - OMP_MAP_TARGET_PARAM = 32
 // - OMP_MAP_TO + OMP_MAP_FROM | OMP_MAP_IMPLICIT | OMP_MAP_MEMBER_OF = 281474976711171
 // - OMP_MAP_PRIVATE_VAL + OMP_MAP_TARGET_PARAM | OMP_MAP_IMPLICIT = 800
 // CK15: [[TYPES:@.+]] = {{.+}}constant [4 x i64] [i64 32, i64 281474976711171, i64 281474976711171, i64 800]
 
+// CK15: [[SIZES2:@.+]] = {{.+}}constant [4 x i64] [i64 0, i64 4, i64 8, i64 4]
 // Map types:
 // - OMP_MAP_TARGET_PARAM = 32
 // - OMP_MAP_TO + OMP_MAP_FROM | OMP_MAP_IMPLICIT | OMP_MAP_MEMBER_OF = 281474976711171
@@ -92,30 +94,24 @@ void implicit_maps_templated_class (int a){
 
   // CK15-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1
   // CK15-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1
-  // CK15-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i32 0, i32 1
   // CK15-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to [[ST]]**
   // CK15-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
   // CK15-DAG: store [[ST]]* [[DECL]], [[ST]]** [[CBP1]]
   // CK15-DAG: store i32* [[A]], i32** [[CP1]]
-  // CK15-DAG: store i64 4, i64* [[S1]]
 
   // CK15-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 2
   // CK15-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 2
-  // CK15-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i32 0, i32 2
   // CK15-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [[ST]]**
   // CK15-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to double**
   // CK15-DAG: store [[ST]]* [[DECL]], [[ST]]** [[CBP2]]
   // CK15-DAG: store double* %{{.+}}, double** [[CP2]]
-  // CK15-DAG: store i64 8, i64* [[S2]]
 
   // CK15-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 3
   // CK15-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 3
-  // CK15-DAG: [[S3:%.+]] = getelementptr inbounds {{.+}}[[S]], i32 0, i32 3
   // CK15-DAG: [[CBP3:%.+]] = bitcast i8** [[BP3]] to i[[sz:64|32]]*
   // CK15-DAG: [[CP3:%.+]] = bitcast i8** [[P3]] to i[[sz]]*
   // CK15-DAG: store i[[sz]] [[VAL:%.+]], i[[sz]]* [[CBP3]]
   // CK15-DAG: store i[[sz]] [[VAL]], i[[sz]]* [[CP3]]
-  // CK15-DAG: store i64 4, i64* [[S3]]
   // CK15-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
   // CK15-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
   // CK15-64-DAG: store i32 {{.+}}, i32* [[CADDR]],
@@ -140,30 +136,24 @@ void implicit_maps_templated_class (int a){
 
   // CK15-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1
   // CK15-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1
-  // CK15-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i32 0, i32 1
   // CK15-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to [[ST]]**
   // CK15-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
   // CK15-DAG: store [[ST]]* [[DECL]], [[ST]]** [[CBP1]]
   // CK15-DAG: store i32* [[A]], i32** [[CP1]]
-  // CK15-DAG: store i64 4, i64* [[S1]]
 
   // CK15-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 2
   // CK15-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 2
-  // CK15-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i32 0, i32 2
   // CK15-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [[ST]]**
   // CK15-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to double**
   // CK15-DAG: store [[ST]]* [[DECL]], [[ST]]** [[CBP2]]
   // CK15-DAG: store double* %{{.+}}, double** [[CP2]]
-  // CK15-DAG: store i64 8, i64* [[S2]]
 
   // CK15-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 3
   // CK15-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 3
-  // CK15-DAG: [[S3:%.+]] = getelementptr inbounds {{.+}}[[S]], i32 0, i32 3
   // CK15-DAG: [[CBP3:%.+]] = bitcast i8** [[BP3]] to i[[sz]]*
   // CK15-DAG: [[CP3:%.+]] = bitcast i8** [[P3]] to i[[sz]]*
   // CK15-DAG: store i[[sz]] [[VAL:%.+]], i[[sz]]* [[CBP3]]
   // CK15-DAG: store i[[sz]] [[VAL]], i[[sz]]* [[CP3]]
-  // CK15-DAG: store i64 4, i64* [[S3]]
   // CK15-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
   // CK15-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
   // CK15-64-DAG: store i32 {{.+}}, i32* [[CADDR]],

diff  --git a/clang/test/OpenMP/target_map_codegen_18.inc b/clang/test/OpenMP/target_map_codegen_18.inc
index 34343dd5f4720..9e9bbed6511f6 100644
--- a/clang/test/OpenMP/target_map_codegen_18.inc
+++ b/clang/test/OpenMP/target_map_codegen_18.inc
@@ -91,6 +91,7 @@
 // CK19-NOUSE: [[MTYPE15:@.+]] = private {{.*}}constant [1 x i64] [i64 2]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
+// CK19-USE: [[SIZE16:@.+]] = private {{.*}}constant [2 x i64] [i64 {{8|4}}, i64 0]
 // CK19-USE: [[MTYPE16:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 33]
 // CK19-NOUSE: [[MTYPE16:@.+]] = private {{.*}}constant [1 x i64] [i64 1]
 
@@ -107,6 +108,7 @@
 // CK19-NOUSE: [[MTYPE18:@.+]] = private {{.*}}constant [1 x i64] [i64 3]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
+// CK19-USE: [[SIZE19:@.+]] = private {{.*}}constant [2 x i64] [i64 {{8|4}}, i64 0]
 // CK19-USE: [[MTYPE19:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 32]
 // CK19-NOUSE: [[MTYPE19:@.+]] = private {{.*}}constant [1 x i64] zeroinitializer
 
@@ -117,6 +119,7 @@
 // CK19-NOUSE: [[MTYPE20:@.+]] = private {{.*}}constant [1 x i64] [i64 1]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
+// CK19-USE: [[SIZE21:@.+]] = private {{.*}}constant [2 x i64] [i64 {{8|4}}, i64 0]
 // CK19-USE: [[MTYPE21:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 35]
 // CK19-NOUSE: [[MTYPE21:@.+]] = private {{.*}}constant [1 x i64] [i64 3]
 
@@ -162,6 +165,7 @@
 // CK19-NOUSE: [[MTYPE29:@.+]] = private {{.*}}constant [3 x i64] [i64 3, i64 16, i64 19]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
+// CK19-USE: [[SIZE30:@.+]] = private {{.*}}constant [4 x i64] [i64 {{8|4}}, i64 {{8|4}}, i64 {{8|4}}, i64 0]
 // CK19-USE: [[MTYPE30:@.+]] = private {{.*}}constant [4 x i64] [i64 800, i64 800, i64 800, i64 35]
 // CK19-NOUSE: [[MTYPE30:@.+]] = private {{.*}}constant [1 x i64] [i64 3]
 
@@ -196,18 +200,22 @@
 // CK19-NOUSE: [[MTYPE36:@.+]] = private {{.*}}constant [1 x i64] [i64 3]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
+// CK19-USE: [[SIZE37:@.+]] = private {{.*}}constant [3 x i64] [i64 {{8|4}}, i64 {{8|4}}, i64 0]
 // CK19-USE: [[MTYPE37:@.+]] = private {{.*}}constant [3 x i64] [i64 800, i64 800, i64 35]
 // CK19-NOUSE: [[MTYPE37:@.+]] = private {{.*}}constant [1 x i64] [i64 3]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
+// CK19-USE: [[SIZE38:@.+]] = private {{.*}}constant [3 x i64] [i64 {{8|4}}, i64 {{8|4}}, i64 0]
 // CK19-USE: [[MTYPE38:@.+]] = private {{.*}}constant [3 x i64] [i64 800, i64 800, i64 35]
 // CK19-NOUSE: [[MTYPE38:@.+]] = private {{.*}}constant [1 x i64] [i64 3]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
+// CK19-USE: [[SIZE39:@.+]] = private {{.*}}constant [3 x i64] [i64 {{8|4}}, i64 {{8|4}}, i64 0]
 // CK19-USE: [[MTYPE39:@.+]] = private {{.*}}constant [3 x i64] [i64 800, i64 800, i64 35]
 // CK19-NOUSE: [[MTYPE39:@.+]] = private {{.*}}constant [1 x i64] [i64 3]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
+// CK19-USE: [[SIZE40:@.+]] = private {{.*}}constant [3 x i64] [i64 {{8|4}}, i64 {{8|4}}, i64 0]
 // CK19-USE: [[MTYPE40:@.+]] = private {{.*}}constant [3 x i64] [i64 800, i64 800, i64 35]
 // CK19-NOUSE: [[MTYPE40:@.+]] = private {{.*}}constant [1 x i64] [i64 3]
 
@@ -655,12 +663,10 @@ void explicit_maps_single (int ii){
 
   // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
   // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-USE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
   // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z:64|32]]*
   // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
   // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
   // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
-  // CK19-USE-DAG: store i{{.+}} {{8|4}}, i{{.+}}* [[S0]]
 
   // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
   // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
@@ -773,12 +779,10 @@ void explicit_maps_single (int ii){
 
   // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
   // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-USE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
   // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
   // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
   // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
   // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
-  // CK19-USE-DAG: store i{{.+}} {{8|4}}, i{{.+}}* [[S0]]
 
   // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
   // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
@@ -856,12 +860,10 @@ void explicit_maps_single (int ii){
 
   // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
   // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-USE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
   // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
   // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
   // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
   // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
-  // CK19-USE-DAG: store i{{.+}} {{8|4}}, i{{.+}}* [[S0]]
 
   // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
   // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
@@ -1157,32 +1159,26 @@ void explicit_maps_single (int ii){
   //
   // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
   // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-USE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
   // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
   // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
   // CK19-USE-DAG: store i[[Z]] 23, i[[Z]]* [[CBP0]]
   // CK19-USE-DAG: store i[[Z]] 23, i[[Z]]* [[CP0]]
-  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S0]]
   //
   // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
   // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-USE-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
   // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
   // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
   // CK19-USE-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
   // CK19-USE-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
-  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S1]]
   // CK19-64-USE-DAG: [[VAR1]] = zext i32 %{{[^,]+}} to i64
   // CK19-64-USE-DAG: [[VAR11]] = zext i32 %{{[^,]+}} to i64
   //
   // CK19-USE-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
   // CK19-USE-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
-  // CK19-USE-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
   // CK19-USE-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to i[[Z]]*
   // CK19-USE-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to i[[Z]]*
   // CK19-USE-DAG: store i[[Z]] [[VAR2:%.+]], i[[Z]]* [[CBP2]]
   // CK19-USE-DAG: store i[[Z]] [[VAR22:%.+]], i[[Z]]* [[CP2]]
-  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S2]]
   // CK19-64-USE-DAG: [[VAR2]] = zext i32 %{{[^,]+}} to i64
   // CK19-64-USE-DAG: [[VAR22]] = zext i32 %{{[^,]+}} to i64
   //
@@ -1404,21 +1400,17 @@ void explicit_maps_single (int ii){
   //
   // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
   // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-USE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
   // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
   // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
   // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CBP0]]
   // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CP0]]
-  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S0]]
   //
   // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
   // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-USE-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
   // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
   // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
   // CK19-USE-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
   // CK19-USE-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
-  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S1]]
   //
   // CK19-USE-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
   // CK19-USE-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
@@ -1457,21 +1449,17 @@ void explicit_maps_single (int ii){
   //
   // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
   // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-USE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
   // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
   // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
   // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CBP0]]
   // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CP0]]
-  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S0]]
   //
   // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
   // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-USE-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
   // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
   // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
   // CK19-USE-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
   // CK19-USE-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
-  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S1]]
   //
   // CK19-USE-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
   // CK19-USE-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
@@ -1514,21 +1502,17 @@ void explicit_maps_single (int ii){
   //
   // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
   // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-USE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
   // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
   // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
   // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CBP0]]
   // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CP0]]
-  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S0]]
   //
   // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
   // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-USE-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
   // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
   // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
   // CK19-USE-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
   // CK19-USE-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
-  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S1]]
   //
   // CK19-USE-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
   // CK19-USE-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
@@ -1571,21 +1555,17 @@ void explicit_maps_single (int ii){
   //
   // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
   // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-USE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
   // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
   // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
   // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CBP0]]
   // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CP0]]
-  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S0]]
   //
   // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
   // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-USE-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
   // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
   // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
   // CK19-USE-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
   // CK19-USE-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
-  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S1]]
   //
   // CK19-USE-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
   // CK19-USE-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
@@ -1599,6 +1579,7 @@ void explicit_maps_single (int ii){
   // CK19-USE-DAG: [[SEC22]] = getelementptr {{.+}}[13 x double]* [[VAR2]], i[[Z]] [[SEC222:%[^,]+]]
   // CK19-USE-DAG: [[SEC222]] = mul nsw i[[Z]] 1, %{{[^,]+}}
 
+  // CK19-NOUSE-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
   // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
   // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
   // CK19-NOUSE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0

diff  --git a/clang/test/OpenMP/target_map_codegen_20.cpp b/clang/test/OpenMP/target_map_codegen_20.cpp
index 3696cd689cb59..c8ee925f0fa30 100644
--- a/clang/test/OpenMP/target_map_codegen_20.cpp
+++ b/clang/test/OpenMP/target_map_codegen_20.cpp
@@ -74,6 +74,7 @@
 // CK21-NOUSE: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i64] [i64 3]
 
 // CK21-LABEL: @.__omp_offloading_{{.*}}foo{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
+// CK21: [[SIZE02:@.+]] = private {{.*}}constant [2 x i64] [i64 0, i64 500]
 // CK21-USE: [[MTYPE02:@.+]] = private {{.*}}constant [2 x i64] [i64 32, i64 281474976710674]
 // CK21-NOUSE: [[MTYPE02:@.+]] = private {{.*}}constant [2 x i64] [i64 0, i64 281474976710674]
 
@@ -88,6 +89,7 @@
 // CK21-NOUSE: [[MTYPE04:@.+]] = private {{.*}}constant [1 x i64] [i64 2]
 
 // CK21-LABEL: @.__omp_offloading_{{.*}}foo{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
+// CK21: [[SIZE05:@.+]] = private {{.*}}constant [3 x i64] [i64 0, i64 4, i64 4]
 // CK21-USE: [[MTYPE05:@.+]] = private {{.*}}constant [3 x i64] [i64 32, i64 281474976710659, i64 281474976710659]
 // CK21-NOUSE: [[MTYPE05:@.+]] = private {{.*}}constant [3 x i64] [i64 0, i64 281474976710659, i64 281474976710659]
 
@@ -167,12 +169,10 @@ struct CC {
 
     // CK21-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
     // CK21-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-    // CK21-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
     // CK21-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to float***
     // CK21-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to float**
     // CK21-DAG: store float** [[SEC0]], float*** [[CBP1]]
     // CK21-DAG: store float* [[SEC1:%.+]], float** [[CP1]]
-    // CK21-DAG: store i64 {{.+}}, i64* [[S1]]
     // CK21-DAG: [[SEC1]] = getelementptr {{.*}}float* [[RVAR1:%[^,]+]], i{{.+}} 123
     // CK21-DAG: [[RVAR1]] = load float*, float** [[SEC1_:%[^,]+]]
     // CK21-DAG: [[SEC1_]] = getelementptr {{.*}}[[ST]]* [[VAR0]], i{{.+}} 0, i{{.+}} 2
@@ -247,21 +247,17 @@ struct CC {
 
     // CK21-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
     // CK21-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-    // CK21-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
     // CK21-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to [[ST]]**
     // CK21-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
     // CK21-DAG: store [[ST]]* [[VAR0]], [[ST]]** [[CBP1]]
     // CK21-DAG: store i32* [[SEC0]], i32** [[CP1]]
-    // CK21-DAG: store i64 {{.+}}, i64* [[S1]]
 
     // CK21-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
     // CK21-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
-    // CK21-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
     // CK21-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [[ST]]**
     // CK21-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to i32**
     // CK21-DAG: store [[ST]]* [[VAR2:%.+]], [[ST]]** [[CBP2]]
     // CK21-DAG: store i32* [[SEC2:%.+]], i32** [[CP2]]
-    // CK21-DAG: store i64 {{.+}}, i64* [[S2]]
     // CK21-DAG: [[SEC2]] = getelementptr {{.*}}[[ST]]* [[VAR2]], i{{.+}} 0, i{{.+}} 1
 
     // CK21-USE: call void [[CALL05:@.+]]([[ST]]* {{[^,]+}})

diff  --git a/clang/test/OpenMP/target_map_codegen_23.cpp b/clang/test/OpenMP/target_map_codegen_23.cpp
index c15715f2b1b36..d425a2e0ec8b1 100644
--- a/clang/test/OpenMP/target_map_codegen_23.cpp
+++ b/clang/test/OpenMP/target_map_codegen_23.cpp
@@ -73,9 +73,11 @@ struct SC{
 // CK24: [[MTYPE15:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK24-LABEL: @.__omp_offloading_{{.*}}explicit_maps_struct_fields{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
+// CK24: [[SIZE16:@.+]] = private {{.*}}constant [2 x i64] [i64 0, i64 20]
 // CK24: [[MTYPE16:@.+]] = private {{.*}}constant [2 x i64] [i64 32, i64 281474976710659]
 
 // CK24-LABEL: @.__omp_offloading_{{.*}}explicit_maps_struct_fields{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
+// CK24: [[SIZE17:@.+]] = private {{.*}}constant [2 x i64] [i64 0, i64 {{3560|2880}}]
 // CK24: [[MTYPE17:@.+]] = private {{.*}}constant [2 x i64] [i64 32, i64 281474976710675]
 
 // CK24-LABEL: @.__omp_offloading_{{.*}}explicit_maps_struct_fields{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
@@ -83,21 +85,27 @@ struct SC{
 // CK24: [[MTYPE18:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK24-LABEL: @.__omp_offloading_{{.*}}explicit_maps_struct_fields{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
+// CK24: [[SIZE19:@.+]] = private {{.*}}constant [3 x i64] [i64 0, i64 {{8|4}}, i64 4]
 // CK24: [[MTYPE19:@.+]] = private {{.*}}constant [3 x i64] [i64 32, i64 281474976710659, i64 281474976710675]
 
 // CK24-LABEL: @.__omp_offloading_{{.*}}explicit_maps_struct_fields{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
+// CK24: [[SIZE20:@.+]] = private {{.*}}constant [2 x i64] [i64 0, i64 4]
 // CK24: [[MTYPE20:@.+]] = private {{.*}}constant [2 x i64] [i64 32, i64 281474976710675]
 
 // CK24-LABEL: @.__omp_offloading_{{.*}}explicit_maps_struct_fields{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
+// CK24: [[SIZE21:@.+]] = private {{.*}}constant [3 x i64] [i64 0, i64 {{8|4}}, i64 4]
 // CK24: [[MTYPE21:@.+]] = private {{.*}}constant [3 x i64] [i64 32, i64 281474976710659, i64 281474976710675]
 
 // CK24-LABEL: @.__omp_offloading_{{.*}}explicit_maps_struct_fields{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
+// CK24: [[SIZE22:@.+]] = private {{.*}}constant [2 x i64] [i64 0, i64 8]
 // CK24: [[MTYPE22:@.+]] = private {{.*}}constant [2 x i64] [i64 32, i64 281474976710659]
 
 // CK24-LABEL: @.__omp_offloading_{{.*}}explicit_maps_struct_fields{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
+// CK24: [[SIZE23:@.+]] = private {{.*}}constant [3 x i64] [i64 0, i64 {{8|4}}, i64 8]
 // CK24: [[MTYPE23:@.+]] = private {{.*}}constant [3 x i64] [i64 32, i64 281474976710659, i64 281474976710675]
 
 // CK24-LABEL: @.__omp_offloading_{{.*}}explicit_maps_struct_fields{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
+// CK24: [[SIZE24:@.+]] = private {{.*}}constant [4 x i64] [i64 0, i64 {{8|4}}, i64 {{8|4}}, i64 4]
 // CK24: [[MTYPE24:@.+]] = private {{.*}}constant [4 x i64] [i64 32, i64 281474976710672, i64 16, i64 19]
 
 // CK24-LABEL: explicit_maps_struct_fields
@@ -207,12 +215,10 @@ int explicit_maps_struct_fields(int a){
 
 // CK24-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
 // CK24-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-// CK24-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
 // CK24-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to [[SC]]**
 // CK24-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
 // CK24-DAG: store [[SC]]* [[VAR0]], [[SC]]** [[CBP1]]
 // CK24-DAG: store i32* [[SEC0]], i32** [[CP1]]
-// CK24-DAG: store i64 {{.+}}, i64* [[S1]]
 
 // CK24-DAG: [[VAR0]] = load [[SC]]*, [[SC]]** %{{.+}}
 // CK24-DAG: [[VAR00]] = load [[SC]]*, [[SC]]** %{{.+}}
@@ -239,12 +245,10 @@ int explicit_maps_struct_fields(int a){
 
 // CK24-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
 // CK24-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-// CK24-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
 // CK24-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to [[SB]]***
 // CK24-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to [[SB]]**
 // CK24-DAG: store [[SB]]** [[SEC0]], [[SB]]*** [[CBP1]]
 // CK24-DAG: store [[SB]]* [[SEC1:%.+]], [[SB]]** [[CP1]]
-// CK24-DAG: store i64 {{.+}}, i64* [[S1]]
 // CK24-DAG: [[SEC1]] = getelementptr {{.*}}[[SB]]* [[SEC11:%[^,]+]], i{{.+}} 0
 // CK24-DAG: [[SEC11]] = load [[SB]]*, [[SB]]** [[SEC111:%[^,]+]],
 // CK24-DAG: [[SEC111]] = getelementptr {{.*}}[[SC]]* [[VAR000:%.+]], i{{.+}} 0, i{{.+}} 2
@@ -300,12 +304,10 @@ int explicit_maps_struct_fields(int a){
 
 // CK24-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
 // CK24-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-// CK24-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
 // CK24-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to [[SC]]**
 // CK24-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to [[SA]]***
 // CK24-DAG: store [[SC]]* [[VAR0]], [[SC]]** [[CBP1]]
 // CK24-DAG: store [[SA]]** [[SEC0]], [[SA]]*** [[CP1]]
-// CK24-DAG: store i64 {{.+}}, i64* [[S1]]
 
 // CK24-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
 // CK24-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
@@ -380,21 +382,17 @@ int explicit_maps_struct_fields(int a){
 
 // CK24-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
 // CK24-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-// CK24-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
 // CK24-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to [[SC]]**
 // CK24-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to [[SA]]***
 // CK24-DAG: store [[SC]]* [[VAR0]], [[SC]]** [[CBP1]]
 // CK24-DAG: store [[SA]]** [[SEC0]], [[SA]]*** [[CP1]]
-// CK24-DAG: store i64 {{.+}}, i64* [[S1]]
 
 // CK24-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
 // CK24-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
-// CK24-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
 // CK24-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [[SA]]***
 // CK24-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to i32**
 // CK24-DAG: store [[SA]]** [[SEC0]], [[SA]]*** [[CBP2]]
 // CK24-DAG: store i32* [[SEC1:%.+]], i32** [[CP2]]
-// CK24-DAG: store i64 {{.+}}, i64* [[S2]]
 // CK24-DAG: [[SEC1]] = getelementptr {{.*}}[[SA]]* [[SEC11:%[^,]+]], i{{.+}} 0
 // CK24-DAG: [[SEC11]] = load [[SA]]*, [[SA]]** [[SEC111:%[^,]+]],
 // CK24-DAG: [[SEC111]] = getelementptr {{.*}}[[SB]]* [[SEC1111:[^,]+]], i{{.+}} 0, i{{.+}} 4
@@ -429,12 +427,10 @@ int explicit_maps_struct_fields(int a){
 
 // CK24-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
 // CK24-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-// CK24-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
 // CK24-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to [[SC]]**
 // CK24-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
 // CK24-DAG: store [[SC]]* [[VAR0]], [[SC]]** [[CBP1]]
 // CK24-DAG: store i32* [[SEC0]], i32** [[CP1]]
-// CK24-DAG: store i64 {{.+}}, i64* [[S1]]
 
 // CK24-DAG: [[VAR0]] = load [[SC]]*, [[SC]]** %{{.+}}
 // CK24-DAG: [[VAR00]] = load [[SC]]*, [[SC]]** %{{.+}}
@@ -462,21 +458,17 @@ int explicit_maps_struct_fields(int a){
 
 // CK24-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
 // CK24-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-// CK24-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
 // CK24-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to [[SC]]**
 // CK24-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to [[SA]]***
 // CK24-DAG: store [[SC]]* [[VAR0]], [[SC]]** [[CBP1]]
 // CK24-DAG: store [[SA]]** [[SEC0]], [[SA]]*** [[CP1]]
-// CK24-DAG: store i64 {{.+}}, i64* [[S1]]
 
 // CK24-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
 // CK24-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
-// CK24-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
 // CK24-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [[SA]]***
 // CK24-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to i32**
 // CK24-DAG: store [[SA]]** [[SEC0]], [[SA]]*** [[CBP2]]
 // CK24-DAG: store i32* [[SEC1:%.+]], i32** [[CP2]]
-// CK24-DAG: store i64 {{.+}}, i64* [[S2]]
 // CK24-DAG: [[SEC1]] = getelementptr {{.*}}[10 x i32]* [[SEC11:%[^,]+]], i{{.+}} 0, i{{.+}} 0
 // CK24-DAG: [[SEC11]] = getelementptr {{.*}}[[SA]]* [[SEC111:%[^,]+]], i{{.+}} 0, i{{.+}} 2
 // CK24-DAG: [[SEC111]] = load [[SA]]*, [[SA]]** [[SEC1111:%[^,]+]],
@@ -509,24 +501,20 @@ int explicit_maps_struct_fields(int a){
 
 // CK24-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
 // CK24-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-// CK24-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
 // CK24-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to [[SB]]***
 // CK24-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to [[SA]]***
 // CK24-DAG: store [[SB]]** [[SEC0]], [[SB]]*** [[CBP1]]
 // CK24-DAG: store [[SA]]** [[SEC1:%.+]], [[SA]]*** [[CP1]]
-// CK24-DAG: store i64 {{.+}}, i64* [[S1]]
 // CK24-DAG: [[SEC1]] = getelementptr {{.*}}[[SB]]* [[SEC11:%[^,]+]], i{{.+}} 0, i{{.+}} 4
 // CK24-DAG: [[SEC11]] = load [[SB]]*, [[SB]]** [[SEC111:%[^,]+]],
 // CK24-DAG: [[SEC111]] = getelementptr {{.*}}[[SC]]* [[VAR000:%.+]], i{{.+}} 0, i{{.+}} 2
 
 // CK24-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
 // CK24-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
-// CK24-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
 // CK24-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [[SA]]***
 // CK24-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to [[SA]]***
 // CK24-DAG: store [[SA]]** [[SEC1]], [[SA]]*** [[CBP2]]
 // CK24-DAG: store [[SA]]** [[SEC2:%.+]], [[SA]]*** [[CP2]]
-// CK24-DAG: store i64 {{.+}}, i64* [[S2]]
 // CK24-DAG: [[SEC2]] = getelementptr {{.*}}[[SA]]* [[SEC22:%[^,]+]], i{{.+}} 0, i{{.+}} 1
 // CK24-DAG: [[SEC22]] = load [[SA]]*, [[SA]]** [[SEC222:%[^,]+]],
 // CK24-DAG: [[SEC222]] = getelementptr {{.*}}[[SB]]* [[SEC2222:%[^,]+]], i{{.+}} 0, i{{.+}} 4
@@ -535,12 +523,10 @@ int explicit_maps_struct_fields(int a){
 
 // CK24-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 3
 // CK24-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 3
-// CK24-DAG: [[S3:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 3
 // CK24-DAG: [[CBP3:%.+]] = bitcast i8** [[BP3]] to [[SA]]***
 // CK24-DAG: [[CP3:%.+]] = bitcast i8** [[P3]] to i32**
 // CK24-DAG: store [[SA]]** [[SEC2]], [[SA]]*** [[CBP3]]
 // CK24-DAG: store i32* [[SEC3:%.+]], i32** [[CP3]]
-// CK24-DAG: store i64 {{.+}}, i64* [[S3]]
 // CK24-DAG: [[SEC3]] = getelementptr {{.*}}[[SA]]* [[SEC33:%[^,]+]], i{{.+}} 0, i{{.+}} 0
 // CK24-DAG: [[SEC33]] = load [[SA]]*, [[SA]]** [[SEC333:%[^,]+]],
 // CK24-DAG: [[SEC333]] = getelementptr {{.*}}[[SA]]* [[SEC3333:%[^,]+]], i{{.+}} 0, i{{.+}} 1

diff  --git a/clang/test/OpenMP/target_map_codegen_28.cpp b/clang/test/OpenMP/target_map_codegen_28.cpp
index 9d561eb5a0e53..45cd219f81d8b 100644
--- a/clang/test/OpenMP/target_map_codegen_28.cpp
+++ b/clang/test/OpenMP/target_map_codegen_28.cpp
@@ -37,12 +37,15 @@
 // CK29: [[SSB:%.+]]  = type { [[SSA]]*, [[SSA]]** }
 
 // CK29-LABEL: @.__omp_offloading_{{.*}}foo{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
+// CK29: [[SIZE00:@.+]] = private {{.*}}constant [2 x i64] [i64 0, i64 80]
 // CK29: [[MTYPE00:@.+]] = private {{.*}}constant [2 x i64] [i64 32, i64 281474976710675]
 
 // CK29-LABEL: @.__omp_offloading_{{.*}}foo{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
+// CK29: [[SIZE01:@.+]] = private {{.*}}constant [3 x i64] [i64 0, i64 {{8|4}}, i64 80]
 // CK29: [[MTYPE01:@.+]] = private {{.*}}constant [3 x i64] [i64 32, i64 281474976710672, i64 19]
 
 // CK29-LABEL: @.__omp_offloading_{{.*}}foo{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
+// CK29: [[SIZE02:@.+]] = private {{.*}}constant [2 x i64] [i64 0, i64 80]
 // CK29: [[MTYPE02:@.+]] = private {{.*}}constant [2 x i64] [i64 32, i64 281474976710675]
 
 struct SSA{
@@ -79,12 +82,10 @@ struct SSB{
 
     // CK29-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
     // CK29-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-    // CK29-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
     // CK29-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to double****
     // CK29-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to double**
     // CK29-DAG: store double*** [[VAR1:%.+]], double**** [[CBP2]]
     // CK29-DAG: store double* [[VAR2:%.+]], double** [[CP2]]
-    // CK29-DAG: store i64 80, i64* [[S2]]
     // CK29-DAG: [[VAR1]] = getelementptr inbounds [[SSA]], [[SSA]]* %{{.+}}, i32 0, i32 1
     // CK29-DAG: [[VAR2]] = getelementptr inbounds double, double* [[VAR22:%.+]], i{{.+}} 0
     // CK29-DAG: [[VAR22]] = load double*, double** %{{.+}},
@@ -114,22 +115,18 @@ struct SSB{
 
     // CK29-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
     // CK29-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-    // CK29-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
     // CK29-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to [[SSA]]****
     // CK29-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to double***
     // CK29-DAG: store [[SSA]]*** [[VAR000]], [[SSA]]**** [[CBP1]]
     // CK29-DAG: store double** [[VAR1:%.+]], double*** [[CP1]]
-    // CK29-DAG: store i64 {{8|4}}, i64* [[S1]]
     // CK29-DAG: [[VAR1]] = getelementptr inbounds [[SSA]], [[SSA]]* %{{.+}}, i32 0, i32 0
 
     // CK29-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
     // CK29-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
-    // CK29-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
     // CK29-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to double***
     // CK29-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to double**
     // CK29-DAG: store double** [[VAR1]], double*** [[CBP2]]
     // CK29-DAG: store double* [[VAR2:%.+]], double** [[CP2]]
-    // CK29-DAG: store i64 80, i64* [[S2]]
     // CK29-DAG: [[VAR2]] = getelementptr inbounds double, double* [[VAR22:%.+]], i{{.+}} 0
     // CK29-DAG: [[VAR22]] = load double*, double** %{{.+}},
 
@@ -158,12 +155,10 @@ struct SSB{
 
     // CK29-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
     // CK29-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-    // CK29-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
     // CK29-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to double****
     // CK29-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to double**
     // CK29-DAG: store double*** [[VAR1:%.+]], double**** [[CBP2]]
     // CK29-DAG: store double* [[VAR2:%.+]], double** [[CP2]]
-    // CK29-DAG: store i64 80, i64* [[S2]]
     // CK29-DAG: [[VAR1]] = getelementptr inbounds [[SSA]], [[SSA]]* %{{.+}}, i32 0, i32 1
     // CK29-DAG: [[VAR2]] = getelementptr inbounds double, double* [[VAR22:%.+]], i{{.+}} 0
     // CK29-DAG: [[VAR22]] = load double*, double** %{{.+}},

diff  --git a/clang/test/OpenMP/target_map_codegen_29.cpp b/clang/test/OpenMP/target_map_codegen_29.cpp
index 2be0e2534d6d0..a875f214fbe73 100644
--- a/clang/test/OpenMP/target_map_codegen_29.cpp
+++ b/clang/test/OpenMP/target_map_codegen_29.cpp
@@ -37,6 +37,7 @@
 // CK30-DAG: [[STRUCT:%.+]] = type { [[BASE]], i32*, i32*, i32, i32* }
 
 // CK30-LABEL: @.__omp_offloading_{{.*}}map_with_deep_copy{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
+// CK30: [[SIZE00:@.+]] = private unnamed_addr constant [4 x i64] [i64 0, i64 {{56|28}}, i64 4, i64 4]
 // The first element: 0x20 - OMP_MAP_TARGET_PARAM
 // 2: 0x1000000000003 - OMP_MAP_MEMBER_OF(0) | OMP_MAP_TO | OMP_MAP_FROM - copies all the data in structs excluding deep-copied elements (from &s to end of s).
 // 3-4: 0x1000000000013 - OMP_MAP_MEMBER_OF(0) | OMP_MAP_PTR_AND_OBJ | OMP_MAP_TO | OMP_MAP_FROM - deep copy of the pointers + pointee.
@@ -82,8 +83,6 @@ typedef struct StructWithPtrTag : public Base {
 // CK30-DAG: [[PTR:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[PTRS]], i32 0, i32 1
 // CK30-DAG: [[BC:%.+]] = bitcast i8** [[PTR]] to [[STRUCT]]**
 // CK30-DAG: store [[STRUCT]]* [[S]], [[STRUCT]]** [[BC]],
-// CK30-DAG: [[SIZE:%.+]] = getelementptr inbounds [4 x i64], [4 x i64]* [[SIZES]], i32 0, i32 1
-// CK30-DAG: store i64 {{56|28}}, i64* [[SIZE]],
 
 // CK30-DAG: [[BASE_PTR:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BASES]], i32 0, i32 2
 // CK30-DAG: [[BC:%.+]] = bitcast i8** [[BASE_PTR]] to i32***
@@ -91,8 +90,6 @@ typedef struct StructWithPtrTag : public Base {
 // CK30-DAG: [[PTR:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[PTRS]], i32 0, i32 2
 // CK30-DAG: [[BC:%.+]] = bitcast i8** [[PTR]] to i32**
 // CK30-DAG: store i32* [[S_PTR1_BEGIN:%.+]], i32** [[BC]],
-// CK30-DAG: [[SIZE:%.+]] = getelementptr inbounds [4 x i64], [4 x i64]* [[SIZES]], i32 0, i32 2
-// CK30-DAG: store i64 4, i64* [[SIZE]],
 // CK30-DAG: [[S_PTR1]] = getelementptr inbounds [[STRUCT]], [[STRUCT]]* [[S]], i32 0, i32 4
 // CK30-DAG: [[S_PTR1_BEGIN]] = getelementptr inbounds i32, i32* [[S_PTR1_BEGIN_REF:%.+]], i{{64|32}} 0
 // CK30-DAG: [[S_PTR1_BEGIN_REF]] = load i32*, i32** [[S_PTR1:%.+]],
@@ -104,8 +101,6 @@ typedef struct StructWithPtrTag : public Base {
 // CK30-DAG: [[PTR:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[PTRS]], i32 0, i32 3
 // CK30-DAG: [[BC:%.+]] = bitcast i8** [[PTR]] to i32**
 // CK30-DAG: store i32* [[S_PTRBASE1_BEGIN:%.+]], i32** [[BC]],
-// CK30-DAG: [[SIZE:%.+]] = getelementptr inbounds [4 x i{{64|32}}], [4 x i{{64|32}}]* [[SIZES]], i32 0, i32 3
-// CK30-DAG: store i{{64|32}} 4, i{{64|32}}* [[SIZE]],
 // CK30-DAG: [[S_PTRBASE1]] = getelementptr inbounds [[BASE]], [[BASE]]* [[S_BASE:%.+]], i32 0, i32 2
 // CK30-DAG: [[S_BASE]] = bitcast [[STRUCT]]* [[S]] to [[BASE]]*
 // CK30-DAG: [[S_PTRBASE1_BEGIN]] = getelementptr inbounds i32, i32* [[S_PTRBASE1_BEGIN_REF:%.+]], i{{64|32}} 0

diff  --git a/clang/test/OpenMP/target_map_codegen_31.cpp b/clang/test/OpenMP/target_map_codegen_31.cpp
index 6e4e9dd0a6a91..cdd3dc595c7f1 100644
--- a/clang/test/OpenMP/target_map_codegen_31.cpp
+++ b/clang/test/OpenMP/target_map_codegen_31.cpp
@@ -41,6 +41,7 @@ struct ST {
 
 // CK31A-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
 //
+// CK31A: [[SIZE00:@.+]] = private {{.*}}constant [7 x i64] [i64 0, i64 4, i64 4, i64 4, i64 0, i64 4, i64 4]
 // PRESENT=0x1000 | TARGET_PARAM=0x20 = 0x1020
 // CK31A-USE: [[MTYPE00:@.+]] = private {{.*}}constant [7 x i64] [i64 [[#0x1020]],
 // CK31A-NOUSE: [[MTYPE00:@.+]] = private {{.*}}constant [7 x i64] [i64 [[#0x1000]],
@@ -102,32 +103,26 @@ void explicit_maps_single (int ii){
 // st1.j
 // CK31A-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
 // CK31A-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-// CK31A-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
 // CK31A-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to [[ST]]**
 // CK31A-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
 // CK31A-DAG: store [[ST]]* [[ST1]], [[ST]]** [[CBP1]]
 // CK31A-DAG: store i32* [[ST1_J]], i32** [[CP1]]
-// CK31A-DAG: store i64 4, i64* [[S1]]
 
 // st1.i
 // CK31A-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
 // CK31A-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
-// CK31A-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
 // CK31A-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [[ST]]**
 // CK31A-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to i32**
 // CK31A-DAG: store [[ST]]* [[ST1]], [[ST]]** [[CBP2]]
 // CK31A-DAG: store i32* [[ST1_I]], i32** [[CP2]]
-// CK31A-DAG: store i64 4, i64* [[S2]]
 
 // a
 // CK31A-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 3
 // CK31A-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 3
-// CK31A-DAG: [[S3:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 3
 // CK31A-DAG: [[CBP3:%.+]] = bitcast i8** [[BP3]] to i32**
 // CK31A-DAG: [[CP3:%.+]] = bitcast i8** [[P3]] to i32**
 // CK31A-DAG: store i32* [[A]], i32** [[CBP3]]
 // CK31A-DAG: store i32* [[A]], i32** [[CP3]]
-// CK31A-DAG: store i64 4, i64* [[S3]]
 
 // st2
 // CK31A-DAG: [[BP4:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 4
@@ -142,22 +137,18 @@ void explicit_maps_single (int ii){
 // st2.i
 // CK31A-DAG: [[BP5:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 5
 // CK31A-DAG: [[P5:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 5
-// CK31A-DAG: [[S5:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 5
 // CK31A-DAG: [[CBP5:%.+]] = bitcast i8** [[BP5]] to [[ST]]**
 // CK31A-DAG: [[CP5:%.+]] = bitcast i8** [[P5]] to i32**
 // CK31A-DAG: store [[ST]]* [[ST2]], [[ST]]** [[CBP5]]
 // CK31A-DAG: store i32* [[ST2_I]], i32** [[CP5]]
-// CK31A-DAG: store i64 4, i64* [[S5]]
 
 // st2.j
 // CK31A-DAG: [[BP6:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 6
 // CK31A-DAG: [[P6:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 6
-// CK31A-DAG: [[S6:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 6
 // CK31A-DAG: [[CBP6:%.+]] = bitcast i8** [[BP6]] to [[ST]]**
 // CK31A-DAG: [[CP6:%.+]] = bitcast i8** [[P6]] to i32**
 // CK31A-DAG: store [[ST]]* [[ST2]], [[ST]]** [[CBP6]]
 // CK31A-DAG: store i32* [[ST2_J]], i32** [[CP6]]
-// CK31A-DAG: store i64 4, i64* [[S6]]
 
 // CK31A-USE: call void [[CALL00:@.+]]([[ST]]* [[ST1]], i32* [[A]], [[ST]]* [[ST2]])
 // CK31A-NOUSE: call void [[CALL00:@.+]]()

diff  --git a/clang/test/OpenMP/target_map_codegen_32.cpp b/clang/test/OpenMP/target_map_codegen_32.cpp
index 8f1b6c5bbada9..2b857c11e1b5f 100644
--- a/clang/test/OpenMP/target_map_codegen_32.cpp
+++ b/clang/test/OpenMP/target_map_codegen_32.cpp
@@ -40,6 +40,7 @@
 // MEMBER_OF_1=0x1000000000000 | FROM=0x2 | TO=0x1 = 0x1000000000003
 
 // CK31B-LABEL: @.__omp_offloading_{{.*}}test_present_members{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
+// CK31B: [[SIZE00:@.+]] = private {{.*}}constant [3 x i64] [i64 0, i64 4, i64 4]
 // CK31B-USE: [[MTYPE00:@.+]] = private {{.*}}constant [3 x i64] [i64 [[#0x1020]],
 // CK31B-NOUSE: [[MTYPE00:@.+]] = private {{.*}}constant [3 x i64] [i64 [[#0x1000]],
 // CK31B-USE-SAME: {{^}} i64 [[#0x1000000001003]], i64 [[#0x1000000000003]]]
@@ -73,22 +74,18 @@ struct ST {
 // j
 // CK31B-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
 // CK31B-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-// CK31B-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
 // CK31B-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to [[ST]]**
 // CK31B-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
 // CK31B-DAG: store [[ST]]* [[THIS]], [[ST]]** [[CBP1]]
 // CK31B-DAG: store i32* [[J]], i32** [[CP1]]
-// CK31B-DAG: store i64 4, i64* [[S1]]
 
 // i
 // CK31B-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
 // CK31B-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
-// CK31B-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
 // CK31B-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [[ST]]**
 // CK31B-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to i32**
 // CK31B-DAG: store [[ST]]* [[THIS]], [[ST]]** [[CBP2]]
 // CK31B-DAG: store i32* [[I]], i32** [[CP2]]
-// CK31B-DAG: store i64 4, i64* [[S2]]
 
 // CK31B-USE: call void [[CALL00:@.+]]([[ST]]* [[THIS]])
 // CK31B-NOUSE: call void [[CALL00:@.+]]()

diff  --git a/clang/test/OpenMP/target_map_codegen_34.cpp b/clang/test/OpenMP/target_map_codegen_34.cpp
index 43b07a033804b..8523cd2621185 100644
--- a/clang/test/OpenMP/target_map_codegen_34.cpp
+++ b/clang/test/OpenMP/target_map_codegen_34.cpp
@@ -35,10 +35,12 @@ class S {
   void foo();
 };
 
+// CK34-DAG: [[SIZE_TO:@.+]] = private {{.*}}constant [4 x i64] [i64 0, i64 0, i64 0, i64 {{16|8}}]
 // TARGET_PARAM = 0x20
 // MEMBER_OF_1 | TO = 0x1000000000001
 // MEMBER_OF_1 | IMPLICIT | TO = 0x1000000000201
 // CK34-DAG: [[MTYPE_TO:@.+]] = {{.+}}constant [4 x i64] [i64 [[#0x20]], i64 [[#0x1000000000001]], i64 [[#0x1000000000001]], i64 [[#0x1000000000201]]]
+// CK34-DAG: [[SIZE_FROM:@.+]] = private {{.*}}constant [4 x i64] [i64 0, i64 0, i64 0, i64 {{16|8}}]
 // TARGET_PARAM = 0x20
 // MEMBER_OF_1 | FROM = 0x1000000000002
 // MEMBER_OF_1 | IMPLICIT | FROM = 0x1000000000202
@@ -131,7 +133,6 @@ void default_mapper() {
 
   // CK34-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 3
   // CK34-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 3
-  // CK34-DAG: [[S3:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 3
   // CK34-DAG: [[MF3:%.+]] = getelementptr inbounds {{.+}}[[MF]], i{{.+}} 0, i{{.+}} 3
 
   // CK34-DAG: [[BPC3:%.+]] = bitcast i8** [[BP3]] to %class.S**
@@ -139,8 +140,6 @@ void default_mapper() {
 
   // CK34-DAG: store %class.S* [[S_ADDR]], %class.S** [[BPC3]],
   // CK34-DAG: store %class.C* [[C_ADDR:%.+]], %class.C** [[PC3]],
-  // CK34-64-DAG: store i64 16, i64* [[S3]],
-  // CK34-32-DAG: store i64 8, i64* [[S3]],
   // CK34-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64, i8*)* [[C_DEFAULT_MAPPER:@.+]] to i8*), i8** [[MF3]],
 
   // CK34-64-DAG: [[C_ADDR]] = getelementptr inbounds %class.S, %class.S* [[S_ADDR]], i32 0, i32 2
@@ -235,7 +234,6 @@ void default_mapper() {
 
   // CK34-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 3
   // CK34-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 3
-  // CK34-DAG: [[S3:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 3
   // CK34-DAG: [[MF3:%.+]] = getelementptr inbounds {{.+}}[[MF]], i{{.+}} 0, i{{.+}} 3
 
   // CK34-DAG: [[BPC3:%.+]] = bitcast i8** [[BP3]] to %class.S**
@@ -243,8 +241,6 @@ void default_mapper() {
 
   // CK34-DAG: store %class.S* [[S_ADDR]], %class.S** [[BPC3]],
   // CK34-DAG: store %class.C* [[C_ADDR:%.+]], %class.C** [[PC3]],
-  // CK34-64-DAG: store i64 16, i64* [[S3]],
-  // CK34-32-DAG: store i64 8, i64* [[S3]],
   // CK34-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64, i8*)* [[C_DEFAULT_MAPPER]] to i8*), i8** [[MF3]],
 
   // CK34-64-DAG: [[C_ADDR]] = getelementptr inbounds %class.S, %class.S* [[S_ADDR]], i32 0, i32 2

diff  --git a/clang/test/OpenMP/target_map_codegen_35.cpp b/clang/test/OpenMP/target_map_codegen_35.cpp
index 74fbcf7039584..531648209522d 100644
--- a/clang/test/OpenMP/target_map_codegen_35.cpp
+++ b/clang/test/OpenMP/target_map_codegen_35.cpp
@@ -27,10 +27,12 @@ class S {
   void foo();
 };
 
+// CK35-DAG: [[SIZE_TO:@.+]] = private {{.*}}constant [4 x i64] [i64 0, i64 0, i64 0, i64 8]
 // TARGET_PARAM = 0x20
 // MEMBER_OF_1 | TO = 0x1000000000001
 // MEMBER_OF_1 | PTR_AND_OBJ | TO = 0x1000000000011
 // CK35-DAG: [[MTYPE_TO:@.+]] = {{.+}}constant [4 x i64] [i64 [[#0x20]], i64 [[#0x1000000000001]], i64 [[#0x1000000000001]], i64 [[#0x1000000000011]]]
+// CK35-DAG: [[SIZE_FROM:@.+]] = private {{.*}}constant [2 x i64] [i64 0, i64 8]
 // TARGET_PARAM = 0x20
 // MEMBER_OF_1 | PTR_AND_OBJ | FROM = 0x1000000000012
 // CK35-DAG: [[MTYPE_FROM:@.+]] = {{.+}}constant [2 x i64] [i64 [[#0x20]], i64 [[#0x1000000000012]]]
@@ -115,14 +117,12 @@ void ref_map() {
 
   // CK35-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 3
   // CK35-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 3
-  // CK35-DAG: [[S3:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 3
 
   // CK35-DAG: [[BPC3:%.+]] = bitcast i8** [[BP3]] to %class.S**
   // CK35-DAG: [[PC3:%.+]] = bitcast i8** [[P3]] to double**
 
   // CK35-DAG: store %class.S* [[S_ADDR]], %class.S** [[BPC3]],
   // CK35-DAG: store double* [[B_ADDR:%.+]], double** [[PC3]],
-  // CK35-DAG: store i64 8, i64* [[S3]],
 
   // CK35-DAG: [[B_ADDR]] = load double*, double** [[B_REF:%.+]],
   // CK35-DAG: [[B_REF]] = getelementptr inbounds %class.S, %class.S* [[S_ADDR]], i32 0, i32 1
@@ -163,14 +163,12 @@ void ref_map() {
 
   // CK35-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
   // CK35-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK35-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
 
   // CK35-DAG: [[BPC1:%.+]] = bitcast i8** [[BP1]] to %class.S**
   // CK35-DAG: [[PC1:%.+]] = bitcast i8** [[P1]] to double**
 
   // CK35-DAG: store %class.S* [[S_ADDR]], %class.S** [[BPC1]],
   // CK35-DAG: store double* [[B_ADDR:%.+]], double** [[PC1]],
-  // CK35-DAG: store i64 8, i64* [[S1]],
 
   // CK35-DAG: [[B_ADDR]] = load double*, double** [[SB_ADDR]],
 

diff  --git a/clang/test/OpenMP/target_map_codegen_hold.cpp b/clang/test/OpenMP/target_map_codegen_hold.cpp
index 98e15dfcebfad..1641f48904d54 100644
--- a/clang/test/OpenMP/target_map_codegen_hold.cpp
+++ b/clang/test/OpenMP/target_map_codegen_hold.cpp
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" --global-value-regex ".offload_maptypes.*" ".offload_sizes.*" --global-hex-value-regex ".offload_maptypes.*"
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" --prefix-filecheck-ir-name _ --global-value-regex ".offload_maptypes.*" ".offload_sizes.*" --global-hex-value-regex ".offload_maptypes.*"
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
@@ -83,25 +83,33 @@
 // MEMBER_OF_5  = 0x5000000000000
 
 //.
+// CHECK-USE-PPC64LE: @.offload_sizes = private unnamed_addr constant [7 x i64] [i64 0, i64 4, i64 4, i64 4, i64 0, i64 4, i64 4]
 // CHECK-USE-PPC64LE: @.offload_maptypes = private unnamed_addr constant [7 x i64] [i64 [[#0x2020]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]], i64 [[#0x2023]], i64 [[#0x2020]], i64 [[#0x5000000002003]], i64 [[#0x5000000002003]]]
-// CHECK-USE-PPC64LE: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 4]
-// CHECK-USE-PPC64LE: @.offload_maptypes.1 = private unnamed_addr constant [1 x i64] [i64 [[#0x2427]]]
-// CHECK-USE-PPC64LE: @.offload_maptypes.2 = private unnamed_addr constant [3 x i64] [i64 [[#0x2020]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]]]
+// CHECK-USE-PPC64LE: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 4]
+// CHECK-USE-PPC64LE: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 [[#0x2427]]]
+// CHECK-USE-PPC64LE: @.offload_sizes.3 = private unnamed_addr constant [3 x i64] [i64 0, i64 4, i64 4]
+// CHECK-USE-PPC64LE: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 [[#0x2020]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]]]
 //.
+// CHECK-USE-I386: @.offload_sizes = private unnamed_addr constant [7 x i64] [i64 0, i64 4, i64 4, i64 4, i64 0, i64 4, i64 4]
 // CHECK-USE-I386: @.offload_maptypes = private unnamed_addr constant [7 x i64] [i64 [[#0x2020]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]], i64 [[#0x2023]], i64 [[#0x2020]], i64 [[#0x5000000002003]], i64 [[#0x5000000002003]]]
-// CHECK-USE-I386: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 4]
-// CHECK-USE-I386: @.offload_maptypes.1 = private unnamed_addr constant [1 x i64] [i64 [[#0x2427]]]
-// CHECK-USE-I386: @.offload_maptypes.2 = private unnamed_addr constant [3 x i64] [i64 [[#0x2020]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]]]
+// CHECK-USE-I386: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 4]
+// CHECK-USE-I386: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 [[#0x2427]]]
+// CHECK-USE-I386: @.offload_sizes.3 = private unnamed_addr constant [3 x i64] [i64 0, i64 4, i64 4]
+// CHECK-USE-I386: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 [[#0x2020]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]]]
 //.
+// CHECK-NOUSE-PPC64LE: @.offload_sizes = private unnamed_addr constant [7 x i64] [i64 0, i64 4, i64 4, i64 4, i64 0, i64 4, i64 4]
 // CHECK-NOUSE-PPC64LE: @.offload_maptypes = private unnamed_addr constant [7 x i64] [i64 [[#0x2000]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]], i64 [[#0x2003]], i64 [[#0x2000]], i64 [[#0x5000000002003]], i64 [[#0x5000000002003]]]
-// CHECK-NOUSE-PPC64LE: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 4]
-// CHECK-NOUSE-PPC64LE: @.offload_maptypes.1 = private unnamed_addr constant [1 x i64] [i64 [[#0x2407]]]
-// CHECK-NOUSE-PPC64LE: @.offload_maptypes.2 = private unnamed_addr constant [3 x i64] [i64 [[#0x2000]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]]]
+// CHECK-NOUSE-PPC64LE: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 4]
+// CHECK-NOUSE-PPC64LE: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 [[#0x2407]]]
+// CHECK-NOUSE-PPC64LE: @.offload_sizes.3 = private unnamed_addr constant [3 x i64] [i64 0, i64 4, i64 4]
+// CHECK-NOUSE-PPC64LE: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 [[#0x2000]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]]]
 //.
+// CHECK-NOUSE-I386: @.offload_sizes = private unnamed_addr constant [7 x i64] [i64 0, i64 4, i64 4, i64 4, i64 0, i64 4, i64 4]
 // CHECK-NOUSE-I386: @.offload_maptypes = private unnamed_addr constant [7 x i64] [i64 [[#0x2000]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]], i64 [[#0x2003]], i64 [[#0x2000]], i64 [[#0x5000000002003]], i64 [[#0x5000000002003]]]
-// CHECK-NOUSE-I386: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 4]
-// CHECK-NOUSE-I386: @.offload_maptypes.1 = private unnamed_addr constant [1 x i64] [i64 [[#0x2407]]]
-// CHECK-NOUSE-I386: @.offload_maptypes.2 = private unnamed_addr constant [3 x i64] [i64 [[#0x2000]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]]]
+// CHECK-NOUSE-I386: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 4]
+// CHECK-NOUSE-I386: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 [[#0x2407]]]
+// CHECK-NOUSE-I386: @.offload_sizes.3 = private unnamed_addr constant [3 x i64] [i64 0, i64 4, i64 4]
+// CHECK-NOUSE-I386: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 [[#0x2000]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]]]
 //.
 struct ST {
   int i;
@@ -143,24 +151,24 @@ struct ST {
 // CHECK-USE-PPC64LE-NEXT:    [[TMP12:%.*]] = ptrtoint i8* [[TMP9]] to i64
 // CHECK-USE-PPC64LE-NEXT:    [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]]
 // CHECK-USE-PPC64LE-NEXT:    [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CHECK-USE-PPC64LE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to %struct.ST**
-// CHECK-USE-PPC64LE-NEXT:    store %struct.ST* [[ST1]], %struct.ST** [[TMP16]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
-// CHECK-USE-PPC64LE-NEXT:    store i32* [[I]], i32** [[TMP18]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    store i64 [[TMP7]], i64* [[TMP19]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK-USE-PPC64LE-NEXT:    store i8* null, i8** [[TMP20]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK-USE-PPC64LE-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to %struct.ST**
-// CHECK-USE-PPC64LE-NEXT:    store %struct.ST* [[ST1]], %struct.ST** [[TMP22]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK-USE-PPC64LE-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32**
-// CHECK-USE-PPC64LE-NEXT:    store i32* [[I]], i32** [[TMP24]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK-USE-PPC64LE-NEXT:    store i64 4, i64* [[TMP25]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP15:%.*]] = bitcast [7 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK-USE-PPC64LE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP15]], i8* align 8 bitcast ([7 x i64]* @.offload_sizes to i8*), i64 56, i1 false)
+// CHECK-USE-PPC64LE-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to %struct.ST**
+// CHECK-USE-PPC64LE-NEXT:    store %struct.ST* [[ST1]], %struct.ST** [[TMP17]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
+// CHECK-USE-PPC64LE-NEXT:    store i32* [[I]], i32** [[TMP19]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    store i64 [[TMP7]], i64* [[TMP20]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK-USE-PPC64LE-NEXT:    store i8* null, i8** [[TMP21]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK-USE-PPC64LE-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.ST**
+// CHECK-USE-PPC64LE-NEXT:    store %struct.ST* [[ST1]], %struct.ST** [[TMP23]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK-USE-PPC64LE-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32**
+// CHECK-USE-PPC64LE-NEXT:    store i32* [[I]], i32** [[TMP25]], align 8
 // CHECK-USE-PPC64LE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
 // CHECK-USE-PPC64LE-NEXT:    store i8* null, i8** [[TMP26]], align 8
 // CHECK-USE-PPC64LE-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
@@ -169,75 +177,67 @@ struct ST {
 // CHECK-USE-PPC64LE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
 // CHECK-USE-PPC64LE-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32**
 // CHECK-USE-PPC64LE-NEXT:    store i32* [[J]], i32** [[TMP30]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK-USE-PPC64LE-NEXT:    store i64 4, i64* [[TMP31]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK-USE-PPC64LE-NEXT:    store i8* null, i8** [[TMP32]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
-// CHECK-USE-PPC64LE-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i32**
-// CHECK-USE-PPC64LE-NEXT:    store i32* [[A]], i32** [[TMP34]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK-USE-PPC64LE-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
-// CHECK-USE-PPC64LE-NEXT:    store i32* [[A]], i32** [[TMP36]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK-USE-PPC64LE-NEXT:    store i64 4, i64* [[TMP37]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK-USE-PPC64LE-NEXT:    store i8* null, i8** [[TMP38]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK-USE-PPC64LE-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to %struct.ST**
-// CHECK-USE-PPC64LE-NEXT:    store %struct.ST* [[ST2]], %struct.ST** [[TMP40]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK-USE-PPC64LE-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i32**
-// CHECK-USE-PPC64LE-NEXT:    store i32* [[I1]], i32** [[TMP42]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK-USE-PPC64LE-NEXT:    store i64 [[TMP14]], i64* [[TMP43]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK-USE-PPC64LE-NEXT:    store i8* null, i8** [[TMP44]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
-// CHECK-USE-PPC64LE-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to %struct.ST**
-// CHECK-USE-PPC64LE-NEXT:    store %struct.ST* [[ST2]], %struct.ST** [[TMP46]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
-// CHECK-USE-PPC64LE-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
-// CHECK-USE-PPC64LE-NEXT:    store i32* [[I1]], i32** [[TMP48]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK-USE-PPC64LE-NEXT:    store i64 4, i64* [[TMP49]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
-// CHECK-USE-PPC64LE-NEXT:    store i8* null, i8** [[TMP50]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
-// CHECK-USE-PPC64LE-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to %struct.ST**
-// CHECK-USE-PPC64LE-NEXT:    store %struct.ST* [[ST2]], %struct.ST** [[TMP52]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
-// CHECK-USE-PPC64LE-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32**
-// CHECK-USE-PPC64LE-NEXT:    store i32* [[J2]], i32** [[TMP54]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK-USE-PPC64LE-NEXT:    store i64 4, i64* [[TMP55]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6
-// CHECK-USE-PPC64LE-NEXT:    store i8* null, i8** [[TMP56]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l654.region_id, i32 7, i8** [[TMP57]], i8** [[TMP58]], i64* [[TMP59]], i64* getelementptr inbounds ([7 x i64], [7 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null)
-// CHECK-USE-PPC64LE-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
-// CHECK-USE-PPC64LE-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK-USE-PPC64LE-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK-USE-PPC64LE-NEXT:    store i8* null, i8** [[TMP31]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK-USE-PPC64LE-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i32**
+// CHECK-USE-PPC64LE-NEXT:    store i32* [[A]], i32** [[TMP33]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK-USE-PPC64LE-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i32**
+// CHECK-USE-PPC64LE-NEXT:    store i32* [[A]], i32** [[TMP35]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK-USE-PPC64LE-NEXT:    store i8* null, i8** [[TMP36]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK-USE-PPC64LE-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to %struct.ST**
+// CHECK-USE-PPC64LE-NEXT:    store %struct.ST* [[ST2]], %struct.ST** [[TMP38]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK-USE-PPC64LE-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32**
+// CHECK-USE-PPC64LE-NEXT:    store i32* [[I1]], i32** [[TMP40]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK-USE-PPC64LE-NEXT:    store i64 [[TMP14]], i64* [[TMP41]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK-USE-PPC64LE-NEXT:    store i8* null, i8** [[TMP42]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
+// CHECK-USE-PPC64LE-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to %struct.ST**
+// CHECK-USE-PPC64LE-NEXT:    store %struct.ST* [[ST2]], %struct.ST** [[TMP44]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK-USE-PPC64LE-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32**
+// CHECK-USE-PPC64LE-NEXT:    store i32* [[I1]], i32** [[TMP46]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
+// CHECK-USE-PPC64LE-NEXT:    store i8* null, i8** [[TMP47]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
+// CHECK-USE-PPC64LE-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to %struct.ST**
+// CHECK-USE-PPC64LE-NEXT:    store %struct.ST* [[ST2]], %struct.ST** [[TMP49]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
+// CHECK-USE-PPC64LE-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
+// CHECK-USE-PPC64LE-NEXT:    store i32* [[J2]], i32** [[TMP51]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6
+// CHECK-USE-PPC64LE-NEXT:    store i8* null, i8** [[TMP52]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    [[TMP56:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l630.region_id, i32 7, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([7 x i64], [7 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null)
+// CHECK-USE-PPC64LE-NEXT:    [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0
+// CHECK-USE-PPC64LE-NEXT:    br i1 [[TMP57]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK-USE-PPC64LE:       omp_offload.failed:
-// CHECK-USE-PPC64LE-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l654(%struct.ST* [[ST1]], i32* [[A]], %struct.ST* [[ST2]]) #[[ATTR2:[0-9]+]]
+// CHECK-USE-PPC64LE-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l630(%struct.ST* [[ST1]], i32* [[A]], %struct.ST* [[ST2]]) #[[ATTR3:[0-9]+]]
 // CHECK-USE-PPC64LE-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK-USE-PPC64LE:       omp_offload.cont:
-// CHECK-USE-PPC64LE-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    [[TMP63:%.*]] = bitcast i8** [[TMP62]] to i32**
-// CHECK-USE-PPC64LE-NEXT:    store i32* [[A]], i32** [[TMP63]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to i32**
+// CHECK-USE-PPC64LE-NEXT:    store i32* [[A]], i32** [[TMP59]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    [[TMP61:%.*]] = bitcast i8** [[TMP60]] to i32**
+// CHECK-USE-PPC64LE-NEXT:    store i32* [[A]], i32** [[TMP61]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 0
+// CHECK-USE-PPC64LE-NEXT:    store i8* null, i8** [[TMP62]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK-USE-PPC64LE-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    [[TMP65:%.*]] = bitcast i8** [[TMP64]] to i32**
-// CHECK-USE-PPC64LE-NEXT:    store i32* [[A]], i32** [[TMP65]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 0
-// CHECK-USE-PPC64LE-NEXT:    store i8* null, i8** [[TMP66]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    [[TMP69:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l668.region_id, i32 1, i8** [[TMP67]], i8** [[TMP68]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.1, i32 0, i32 0), i8** null, i8** null)
-// CHECK-USE-PPC64LE-NEXT:    [[TMP70:%.*]] = icmp ne i32 [[TMP69]], 0
-// CHECK-USE-PPC64LE-NEXT:    br i1 [[TMP70]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
+// CHECK-USE-PPC64LE-NEXT:    [[TMP65:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l644.region_id, i32 1, i8** [[TMP63]], i8** [[TMP64]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null)
+// CHECK-USE-PPC64LE-NEXT:    [[TMP66:%.*]] = icmp ne i32 [[TMP65]], 0
+// CHECK-USE-PPC64LE-NEXT:    br i1 [[TMP66]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
 // CHECK-USE-PPC64LE:       omp_offload.failed6:
-// CHECK-USE-PPC64LE-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l668(i32* [[A]]) #[[ATTR2]]
+// CHECK-USE-PPC64LE-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l644(i32* [[A]]) #[[ATTR3]]
 // CHECK-USE-PPC64LE-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
 // CHECK-USE-PPC64LE:       omp_offload.cont7:
 // CHECK-USE-PPC64LE-NEXT:    ret void
@@ -276,24 +276,24 @@ struct ST {
 // CHECK-USE-I386-NEXT:    [[TMP12:%.*]] = ptrtoint i8* [[TMP9]] to i64
 // CHECK-USE-I386-NEXT:    [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]]
 // CHECK-USE-I386-NEXT:    [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CHECK-USE-I386-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to %struct.ST**
-// CHECK-USE-I386-NEXT:    store %struct.ST* [[ST1]], %struct.ST** [[TMP16]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
-// CHECK-USE-I386-NEXT:    store i32* [[I]], i32** [[TMP18]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    store i64 [[TMP7]], i64* [[TMP19]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    store i8* null, i8** [[TMP20]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK-USE-I386-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to %struct.ST**
-// CHECK-USE-I386-NEXT:    store %struct.ST* [[ST1]], %struct.ST** [[TMP22]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK-USE-I386-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32**
-// CHECK-USE-I386-NEXT:    store i32* [[I]], i32** [[TMP24]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK-USE-I386-NEXT:    store i64 4, i64* [[TMP25]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP15:%.*]] = bitcast [7 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK-USE-I386-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP15]], i8* align 4 bitcast ([7 x i64]* @.offload_sizes to i8*), i32 56, i1 false)
+// CHECK-USE-I386-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to %struct.ST**
+// CHECK-USE-I386-NEXT:    store %struct.ST* [[ST1]], %struct.ST** [[TMP17]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
+// CHECK-USE-I386-NEXT:    store i32* [[I]], i32** [[TMP19]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    store i64 [[TMP7]], i64* [[TMP20]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    store i8* null, i8** [[TMP21]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK-USE-I386-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.ST**
+// CHECK-USE-I386-NEXT:    store %struct.ST* [[ST1]], %struct.ST** [[TMP23]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK-USE-I386-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32**
+// CHECK-USE-I386-NEXT:    store i32* [[I]], i32** [[TMP25]], align 4
 // CHECK-USE-I386-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
 // CHECK-USE-I386-NEXT:    store i8* null, i8** [[TMP26]], align 4
 // CHECK-USE-I386-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
@@ -302,75 +302,67 @@ struct ST {
 // CHECK-USE-I386-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
 // CHECK-USE-I386-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32**
 // CHECK-USE-I386-NEXT:    store i32* [[J]], i32** [[TMP30]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK-USE-I386-NEXT:    store i64 4, i64* [[TMP31]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK-USE-I386-NEXT:    store i8* null, i8** [[TMP32]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
-// CHECK-USE-I386-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i32**
-// CHECK-USE-I386-NEXT:    store i32* [[A]], i32** [[TMP34]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK-USE-I386-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
-// CHECK-USE-I386-NEXT:    store i32* [[A]], i32** [[TMP36]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK-USE-I386-NEXT:    store i64 4, i64* [[TMP37]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK-USE-I386-NEXT:    store i8* null, i8** [[TMP38]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK-USE-I386-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to %struct.ST**
-// CHECK-USE-I386-NEXT:    store %struct.ST* [[ST2]], %struct.ST** [[TMP40]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK-USE-I386-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i32**
-// CHECK-USE-I386-NEXT:    store i32* [[I1]], i32** [[TMP42]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK-USE-I386-NEXT:    store i64 [[TMP14]], i64* [[TMP43]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK-USE-I386-NEXT:    store i8* null, i8** [[TMP44]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
-// CHECK-USE-I386-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to %struct.ST**
-// CHECK-USE-I386-NEXT:    store %struct.ST* [[ST2]], %struct.ST** [[TMP46]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
-// CHECK-USE-I386-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
-// CHECK-USE-I386-NEXT:    store i32* [[I1]], i32** [[TMP48]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK-USE-I386-NEXT:    store i64 4, i64* [[TMP49]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
-// CHECK-USE-I386-NEXT:    store i8* null, i8** [[TMP50]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
-// CHECK-USE-I386-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to %struct.ST**
-// CHECK-USE-I386-NEXT:    store %struct.ST* [[ST2]], %struct.ST** [[TMP52]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
-// CHECK-USE-I386-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32**
-// CHECK-USE-I386-NEXT:    store i32* [[J2]], i32** [[TMP54]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK-USE-I386-NEXT:    store i64 4, i64* [[TMP55]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6
-// CHECK-USE-I386-NEXT:    store i8* null, i8** [[TMP56]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l654.region_id, i32 7, i8** [[TMP57]], i8** [[TMP58]], i64* [[TMP59]], i64* getelementptr inbounds ([7 x i64], [7 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null)
-// CHECK-USE-I386-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
-// CHECK-USE-I386-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK-USE-I386-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK-USE-I386-NEXT:    store i8* null, i8** [[TMP31]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK-USE-I386-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i32**
+// CHECK-USE-I386-NEXT:    store i32* [[A]], i32** [[TMP33]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK-USE-I386-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i32**
+// CHECK-USE-I386-NEXT:    store i32* [[A]], i32** [[TMP35]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK-USE-I386-NEXT:    store i8* null, i8** [[TMP36]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK-USE-I386-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to %struct.ST**
+// CHECK-USE-I386-NEXT:    store %struct.ST* [[ST2]], %struct.ST** [[TMP38]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK-USE-I386-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32**
+// CHECK-USE-I386-NEXT:    store i32* [[I1]], i32** [[TMP40]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK-USE-I386-NEXT:    store i64 [[TMP14]], i64* [[TMP41]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK-USE-I386-NEXT:    store i8* null, i8** [[TMP42]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
+// CHECK-USE-I386-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to %struct.ST**
+// CHECK-USE-I386-NEXT:    store %struct.ST* [[ST2]], %struct.ST** [[TMP44]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK-USE-I386-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32**
+// CHECK-USE-I386-NEXT:    store i32* [[I1]], i32** [[TMP46]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
+// CHECK-USE-I386-NEXT:    store i8* null, i8** [[TMP47]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
+// CHECK-USE-I386-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to %struct.ST**
+// CHECK-USE-I386-NEXT:    store %struct.ST* [[ST2]], %struct.ST** [[TMP49]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
+// CHECK-USE-I386-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
+// CHECK-USE-I386-NEXT:    store i32* [[J2]], i32** [[TMP51]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6
+// CHECK-USE-I386-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    [[TMP56:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l630.region_id, i32 7, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([7 x i64], [7 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null)
+// CHECK-USE-I386-NEXT:    [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0
+// CHECK-USE-I386-NEXT:    br i1 [[TMP57]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK-USE-I386:       omp_offload.failed:
-// CHECK-USE-I386-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l654(%struct.ST* [[ST1]], i32* [[A]], %struct.ST* [[ST2]]) #[[ATTR2:[0-9]+]]
+// CHECK-USE-I386-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l630(%struct.ST* [[ST1]], i32* [[A]], %struct.ST* [[ST2]]) #[[ATTR3:[0-9]+]]
 // CHECK-USE-I386-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK-USE-I386:       omp_offload.cont:
-// CHECK-USE-I386-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    [[TMP63:%.*]] = bitcast i8** [[TMP62]] to i32**
-// CHECK-USE-I386-NEXT:    store i32* [[A]], i32** [[TMP63]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to i32**
+// CHECK-USE-I386-NEXT:    store i32* [[A]], i32** [[TMP59]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    [[TMP61:%.*]] = bitcast i8** [[TMP60]] to i32**
+// CHECK-USE-I386-NEXT:    store i32* [[A]], i32** [[TMP61]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS5]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    store i8* null, i8** [[TMP62]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK-USE-I386-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    [[TMP65:%.*]] = bitcast i8** [[TMP64]] to i32**
-// CHECK-USE-I386-NEXT:    store i32* [[A]], i32** [[TMP65]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS5]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    store i8* null, i8** [[TMP66]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    [[TMP69:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l668.region_id, i32 1, i8** [[TMP67]], i8** [[TMP68]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.1, i32 0, i32 0), i8** null, i8** null)
-// CHECK-USE-I386-NEXT:    [[TMP70:%.*]] = icmp ne i32 [[TMP69]], 0
-// CHECK-USE-I386-NEXT:    br i1 [[TMP70]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
+// CHECK-USE-I386-NEXT:    [[TMP65:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l644.region_id, i32 1, i8** [[TMP63]], i8** [[TMP64]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null)
+// CHECK-USE-I386-NEXT:    [[TMP66:%.*]] = icmp ne i32 [[TMP65]], 0
+// CHECK-USE-I386-NEXT:    br i1 [[TMP66]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
 // CHECK-USE-I386:       omp_offload.failed6:
-// CHECK-USE-I386-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l668(i32* [[A]]) #[[ATTR2]]
+// CHECK-USE-I386-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l644(i32* [[A]]) #[[ATTR3]]
 // CHECK-USE-I386-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
 // CHECK-USE-I386:       omp_offload.cont7:
 // CHECK-USE-I386-NEXT:    ret void
@@ -409,24 +401,24 @@ struct ST {
 // CHECK-NOUSE-PPC64LE-NEXT:    [[TMP12:%.*]] = ptrtoint i8* [[TMP9]] to i64
 // CHECK-NOUSE-PPC64LE-NEXT:    [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]]
 // CHECK-NOUSE-PPC64LE-NEXT:    [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to %struct.ST**
-// CHECK-NOUSE-PPC64LE-NEXT:    store %struct.ST* [[ST1]], %struct.ST** [[TMP16]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
-// CHECK-NOUSE-PPC64LE-NEXT:    store i32* [[I]], i32** [[TMP18]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    store i64 [[TMP7]], i64* [[TMP19]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK-NOUSE-PPC64LE-NEXT:    store i8* null, i8** [[TMP20]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to %struct.ST**
-// CHECK-NOUSE-PPC64LE-NEXT:    store %struct.ST* [[ST1]], %struct.ST** [[TMP22]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32**
-// CHECK-NOUSE-PPC64LE-NEXT:    store i32* [[I]], i32** [[TMP24]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK-NOUSE-PPC64LE-NEXT:    store i64 4, i64* [[TMP25]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP15:%.*]] = bitcast [7 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK-NOUSE-PPC64LE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP15]], i8* align 8 bitcast ([7 x i64]* @.offload_sizes to i8*), i64 56, i1 false)
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to %struct.ST**
+// CHECK-NOUSE-PPC64LE-NEXT:    store %struct.ST* [[ST1]], %struct.ST** [[TMP17]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
+// CHECK-NOUSE-PPC64LE-NEXT:    store i32* [[I]], i32** [[TMP19]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    store i64 [[TMP7]], i64* [[TMP20]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK-NOUSE-PPC64LE-NEXT:    store i8* null, i8** [[TMP21]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.ST**
+// CHECK-NOUSE-PPC64LE-NEXT:    store %struct.ST* [[ST1]], %struct.ST** [[TMP23]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32**
+// CHECK-NOUSE-PPC64LE-NEXT:    store i32* [[I]], i32** [[TMP25]], align 8
 // CHECK-NOUSE-PPC64LE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
 // CHECK-NOUSE-PPC64LE-NEXT:    store i8* null, i8** [[TMP26]], align 8
 // CHECK-NOUSE-PPC64LE-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
@@ -435,75 +427,67 @@ struct ST {
 // CHECK-NOUSE-PPC64LE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
 // CHECK-NOUSE-PPC64LE-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32**
 // CHECK-NOUSE-PPC64LE-NEXT:    store i32* [[J]], i32** [[TMP30]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK-NOUSE-PPC64LE-NEXT:    store i64 4, i64* [[TMP31]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK-NOUSE-PPC64LE-NEXT:    store i8* null, i8** [[TMP32]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i32**
-// CHECK-NOUSE-PPC64LE-NEXT:    store i32* [[A]], i32** [[TMP34]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
-// CHECK-NOUSE-PPC64LE-NEXT:    store i32* [[A]], i32** [[TMP36]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK-NOUSE-PPC64LE-NEXT:    store i64 4, i64* [[TMP37]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK-NOUSE-PPC64LE-NEXT:    store i8* null, i8** [[TMP38]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to %struct.ST**
-// CHECK-NOUSE-PPC64LE-NEXT:    store %struct.ST* [[ST2]], %struct.ST** [[TMP40]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i32**
-// CHECK-NOUSE-PPC64LE-NEXT:    store i32* [[I1]], i32** [[TMP42]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK-NOUSE-PPC64LE-NEXT:    store i64 [[TMP14]], i64* [[TMP43]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK-NOUSE-PPC64LE-NEXT:    store i8* null, i8** [[TMP44]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to %struct.ST**
-// CHECK-NOUSE-PPC64LE-NEXT:    store %struct.ST* [[ST2]], %struct.ST** [[TMP46]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
-// CHECK-NOUSE-PPC64LE-NEXT:    store i32* [[I1]], i32** [[TMP48]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK-NOUSE-PPC64LE-NEXT:    store i64 4, i64* [[TMP49]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
-// CHECK-NOUSE-PPC64LE-NEXT:    store i8* null, i8** [[TMP50]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to %struct.ST**
-// CHECK-NOUSE-PPC64LE-NEXT:    store %struct.ST* [[ST2]], %struct.ST** [[TMP52]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32**
-// CHECK-NOUSE-PPC64LE-NEXT:    store i32* [[J2]], i32** [[TMP54]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK-NOUSE-PPC64LE-NEXT:    store i64 4, i64* [[TMP55]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6
-// CHECK-NOUSE-PPC64LE-NEXT:    store i8* null, i8** [[TMP56]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l654.region_id, i32 7, i8** [[TMP57]], i8** [[TMP58]], i64* [[TMP59]], i64* getelementptr inbounds ([7 x i64], [7 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null)
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
-// CHECK-NOUSE-PPC64LE-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK-NOUSE-PPC64LE-NEXT:    store i8* null, i8** [[TMP31]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i32**
+// CHECK-NOUSE-PPC64LE-NEXT:    store i32* [[A]], i32** [[TMP33]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i32**
+// CHECK-NOUSE-PPC64LE-NEXT:    store i32* [[A]], i32** [[TMP35]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK-NOUSE-PPC64LE-NEXT:    store i8* null, i8** [[TMP36]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to %struct.ST**
+// CHECK-NOUSE-PPC64LE-NEXT:    store %struct.ST* [[ST2]], %struct.ST** [[TMP38]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32**
+// CHECK-NOUSE-PPC64LE-NEXT:    store i32* [[I1]], i32** [[TMP40]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK-NOUSE-PPC64LE-NEXT:    store i64 [[TMP14]], i64* [[TMP41]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK-NOUSE-PPC64LE-NEXT:    store i8* null, i8** [[TMP42]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to %struct.ST**
+// CHECK-NOUSE-PPC64LE-NEXT:    store %struct.ST* [[ST2]], %struct.ST** [[TMP44]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32**
+// CHECK-NOUSE-PPC64LE-NEXT:    store i32* [[I1]], i32** [[TMP46]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
+// CHECK-NOUSE-PPC64LE-NEXT:    store i8* null, i8** [[TMP47]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to %struct.ST**
+// CHECK-NOUSE-PPC64LE-NEXT:    store %struct.ST* [[ST2]], %struct.ST** [[TMP49]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
+// CHECK-NOUSE-PPC64LE-NEXT:    store i32* [[J2]], i32** [[TMP51]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6
+// CHECK-NOUSE-PPC64LE-NEXT:    store i8* null, i8** [[TMP52]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP56:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l630.region_id, i32 7, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([7 x i64], [7 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null)
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0
+// CHECK-NOUSE-PPC64LE-NEXT:    br i1 [[TMP57]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK-NOUSE-PPC64LE:       omp_offload.failed:
-// CHECK-NOUSE-PPC64LE-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l654() #[[ATTR2:[0-9]+]]
+// CHECK-NOUSE-PPC64LE-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l630() #[[ATTR3:[0-9]+]]
 // CHECK-NOUSE-PPC64LE-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK-NOUSE-PPC64LE:       omp_offload.cont:
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP63:%.*]] = bitcast i8** [[TMP62]] to i32**
-// CHECK-NOUSE-PPC64LE-NEXT:    store i32* [[A]], i32** [[TMP63]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to i32**
+// CHECK-NOUSE-PPC64LE-NEXT:    store i32* [[A]], i32** [[TMP59]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP61:%.*]] = bitcast i8** [[TMP60]] to i32**
+// CHECK-NOUSE-PPC64LE-NEXT:    store i32* [[A]], i32** [[TMP61]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 0
+// CHECK-NOUSE-PPC64LE-NEXT:    store i8* null, i8** [[TMP62]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK-NOUSE-PPC64LE-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP65:%.*]] = bitcast i8** [[TMP64]] to i32**
-// CHECK-NOUSE-PPC64LE-NEXT:    store i32* [[A]], i32** [[TMP65]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 0
-// CHECK-NOUSE-PPC64LE-NEXT:    store i8* null, i8** [[TMP66]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP69:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l668.region_id, i32 1, i8** [[TMP67]], i8** [[TMP68]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.1, i32 0, i32 0), i8** null, i8** null)
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP70:%.*]] = icmp ne i32 [[TMP69]], 0
-// CHECK-NOUSE-PPC64LE-NEXT:    br i1 [[TMP70]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP65:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l644.region_id, i32 1, i8** [[TMP63]], i8** [[TMP64]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null)
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP66:%.*]] = icmp ne i32 [[TMP65]], 0
+// CHECK-NOUSE-PPC64LE-NEXT:    br i1 [[TMP66]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
 // CHECK-NOUSE-PPC64LE:       omp_offload.failed6:
-// CHECK-NOUSE-PPC64LE-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l668() #[[ATTR2]]
+// CHECK-NOUSE-PPC64LE-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l644() #[[ATTR3]]
 // CHECK-NOUSE-PPC64LE-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
 // CHECK-NOUSE-PPC64LE:       omp_offload.cont7:
 // CHECK-NOUSE-PPC64LE-NEXT:    ret void
@@ -542,24 +526,24 @@ struct ST {
 // CHECK-NOUSE-I386-NEXT:    [[TMP12:%.*]] = ptrtoint i8* [[TMP9]] to i64
 // CHECK-NOUSE-I386-NEXT:    [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]]
 // CHECK-NOUSE-I386-NEXT:    [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CHECK-NOUSE-I386-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to %struct.ST**
-// CHECK-NOUSE-I386-NEXT:    store %struct.ST* [[ST1]], %struct.ST** [[TMP16]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
-// CHECK-NOUSE-I386-NEXT:    store i32* [[I]], i32** [[TMP18]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    store i64 [[TMP7]], i64* [[TMP19]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    store i8* null, i8** [[TMP20]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK-NOUSE-I386-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to %struct.ST**
-// CHECK-NOUSE-I386-NEXT:    store %struct.ST* [[ST1]], %struct.ST** [[TMP22]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK-NOUSE-I386-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32**
-// CHECK-NOUSE-I386-NEXT:    store i32* [[I]], i32** [[TMP24]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK-NOUSE-I386-NEXT:    store i64 4, i64* [[TMP25]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP15:%.*]] = bitcast [7 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK-NOUSE-I386-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP15]], i8* align 4 bitcast ([7 x i64]* @.offload_sizes to i8*), i32 56, i1 false)
+// CHECK-NOUSE-I386-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to %struct.ST**
+// CHECK-NOUSE-I386-NEXT:    store %struct.ST* [[ST1]], %struct.ST** [[TMP17]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
+// CHECK-NOUSE-I386-NEXT:    store i32* [[I]], i32** [[TMP19]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    store i64 [[TMP7]], i64* [[TMP20]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    store i8* null, i8** [[TMP21]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK-NOUSE-I386-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.ST**
+// CHECK-NOUSE-I386-NEXT:    store %struct.ST* [[ST1]], %struct.ST** [[TMP23]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK-NOUSE-I386-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32**
+// CHECK-NOUSE-I386-NEXT:    store i32* [[I]], i32** [[TMP25]], align 4
 // CHECK-NOUSE-I386-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
 // CHECK-NOUSE-I386-NEXT:    store i8* null, i8** [[TMP26]], align 4
 // CHECK-NOUSE-I386-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
@@ -568,75 +552,67 @@ struct ST {
 // CHECK-NOUSE-I386-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
 // CHECK-NOUSE-I386-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32**
 // CHECK-NOUSE-I386-NEXT:    store i32* [[J]], i32** [[TMP30]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK-NOUSE-I386-NEXT:    store i64 4, i64* [[TMP31]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK-NOUSE-I386-NEXT:    store i8* null, i8** [[TMP32]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
-// CHECK-NOUSE-I386-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i32**
-// CHECK-NOUSE-I386-NEXT:    store i32* [[A]], i32** [[TMP34]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK-NOUSE-I386-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
-// CHECK-NOUSE-I386-NEXT:    store i32* [[A]], i32** [[TMP36]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK-NOUSE-I386-NEXT:    store i64 4, i64* [[TMP37]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK-NOUSE-I386-NEXT:    store i8* null, i8** [[TMP38]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to %struct.ST**
-// CHECK-NOUSE-I386-NEXT:    store %struct.ST* [[ST2]], %struct.ST** [[TMP40]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i32**
-// CHECK-NOUSE-I386-NEXT:    store i32* [[I1]], i32** [[TMP42]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK-NOUSE-I386-NEXT:    store i64 [[TMP14]], i64* [[TMP43]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK-NOUSE-I386-NEXT:    store i8* null, i8** [[TMP44]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
-// CHECK-NOUSE-I386-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to %struct.ST**
-// CHECK-NOUSE-I386-NEXT:    store %struct.ST* [[ST2]], %struct.ST** [[TMP46]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
-// CHECK-NOUSE-I386-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
-// CHECK-NOUSE-I386-NEXT:    store i32* [[I1]], i32** [[TMP48]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK-NOUSE-I386-NEXT:    store i64 4, i64* [[TMP49]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
-// CHECK-NOUSE-I386-NEXT:    store i8* null, i8** [[TMP50]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
-// CHECK-NOUSE-I386-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to %struct.ST**
-// CHECK-NOUSE-I386-NEXT:    store %struct.ST* [[ST2]], %struct.ST** [[TMP52]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
-// CHECK-NOUSE-I386-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32**
-// CHECK-NOUSE-I386-NEXT:    store i32* [[J2]], i32** [[TMP54]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK-NOUSE-I386-NEXT:    store i64 4, i64* [[TMP55]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6
-// CHECK-NOUSE-I386-NEXT:    store i8* null, i8** [[TMP56]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l654.region_id, i32 7, i8** [[TMP57]], i8** [[TMP58]], i64* [[TMP59]], i64* getelementptr inbounds ([7 x i64], [7 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null)
-// CHECK-NOUSE-I386-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
-// CHECK-NOUSE-I386-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK-NOUSE-I386-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK-NOUSE-I386-NEXT:    store i8* null, i8** [[TMP31]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK-NOUSE-I386-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i32**
+// CHECK-NOUSE-I386-NEXT:    store i32* [[A]], i32** [[TMP33]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK-NOUSE-I386-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i32**
+// CHECK-NOUSE-I386-NEXT:    store i32* [[A]], i32** [[TMP35]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK-NOUSE-I386-NEXT:    store i8* null, i8** [[TMP36]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to %struct.ST**
+// CHECK-NOUSE-I386-NEXT:    store %struct.ST* [[ST2]], %struct.ST** [[TMP38]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32**
+// CHECK-NOUSE-I386-NEXT:    store i32* [[I1]], i32** [[TMP40]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK-NOUSE-I386-NEXT:    store i64 [[TMP14]], i64* [[TMP41]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK-NOUSE-I386-NEXT:    store i8* null, i8** [[TMP42]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
+// CHECK-NOUSE-I386-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to %struct.ST**
+// CHECK-NOUSE-I386-NEXT:    store %struct.ST* [[ST2]], %struct.ST** [[TMP44]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK-NOUSE-I386-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32**
+// CHECK-NOUSE-I386-NEXT:    store i32* [[I1]], i32** [[TMP46]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
+// CHECK-NOUSE-I386-NEXT:    store i8* null, i8** [[TMP47]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
+// CHECK-NOUSE-I386-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to %struct.ST**
+// CHECK-NOUSE-I386-NEXT:    store %struct.ST* [[ST2]], %struct.ST** [[TMP49]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
+// CHECK-NOUSE-I386-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
+// CHECK-NOUSE-I386-NEXT:    store i32* [[J2]], i32** [[TMP51]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6
+// CHECK-NOUSE-I386-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    [[TMP56:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l630.region_id, i32 7, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([7 x i64], [7 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null)
+// CHECK-NOUSE-I386-NEXT:    [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0
+// CHECK-NOUSE-I386-NEXT:    br i1 [[TMP57]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK-NOUSE-I386:       omp_offload.failed:
-// CHECK-NOUSE-I386-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l654() #[[ATTR2:[0-9]+]]
+// CHECK-NOUSE-I386-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l630() #[[ATTR3:[0-9]+]]
 // CHECK-NOUSE-I386-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK-NOUSE-I386:       omp_offload.cont:
-// CHECK-NOUSE-I386-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    [[TMP63:%.*]] = bitcast i8** [[TMP62]] to i32**
-// CHECK-NOUSE-I386-NEXT:    store i32* [[A]], i32** [[TMP63]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to i32**
+// CHECK-NOUSE-I386-NEXT:    store i32* [[A]], i32** [[TMP59]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    [[TMP61:%.*]] = bitcast i8** [[TMP60]] to i32**
+// CHECK-NOUSE-I386-NEXT:    store i32* [[A]], i32** [[TMP61]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS5]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    store i8* null, i8** [[TMP62]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
 // CHECK-NOUSE-I386-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    [[TMP65:%.*]] = bitcast i8** [[TMP64]] to i32**
-// CHECK-NOUSE-I386-NEXT:    store i32* [[A]], i32** [[TMP65]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS5]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    store i8* null, i8** [[TMP66]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    [[TMP69:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l668.region_id, i32 1, i8** [[TMP67]], i8** [[TMP68]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.1, i32 0, i32 0), i8** null, i8** null)
-// CHECK-NOUSE-I386-NEXT:    [[TMP70:%.*]] = icmp ne i32 [[TMP69]], 0
-// CHECK-NOUSE-I386-NEXT:    br i1 [[TMP70]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
+// CHECK-NOUSE-I386-NEXT:    [[TMP65:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l644.region_id, i32 1, i8** [[TMP63]], i8** [[TMP64]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null)
+// CHECK-NOUSE-I386-NEXT:    [[TMP66:%.*]] = icmp ne i32 [[TMP65]], 0
+// CHECK-NOUSE-I386-NEXT:    br i1 [[TMP66]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
 // CHECK-NOUSE-I386:       omp_offload.failed6:
-// CHECK-NOUSE-I386-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l668() #[[ATTR2]]
+// CHECK-NOUSE-I386-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z20explicit_maps_singlei_l644() #[[ATTR3]]
 // CHECK-NOUSE-I386-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
 // CHECK-NOUSE-I386:       omp_offload.cont7:
 // CHECK-NOUSE-I386-NEXT:    ret void
@@ -691,24 +667,24 @@ void explicit_maps_single(int ii) {
 // CHECK-USE-PPC64LE-NEXT:    [[TMP4:%.*]] = ptrtoint i8* [[TMP1]] to i64
 // CHECK-USE-PPC64LE-NEXT:    [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]]
 // CHECK-USE-PPC64LE-NEXT:    [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CHECK-USE-PPC64LE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to %struct.ST**
-// CHECK-USE-PPC64LE-NEXT:    store %struct.ST* [[THIS1]], %struct.ST** [[TMP8]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32**
-// CHECK-USE-PPC64LE-NEXT:    store i32* [[I]], i32** [[TMP10]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    store i64 [[TMP6]], i64* [[TMP11]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK-USE-PPC64LE-NEXT:    store i8* null, i8** [[TMP12]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK-USE-PPC64LE-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to %struct.ST**
-// CHECK-USE-PPC64LE-NEXT:    store %struct.ST* [[THIS1]], %struct.ST** [[TMP14]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK-USE-PPC64LE-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32**
-// CHECK-USE-PPC64LE-NEXT:    store i32* [[I]], i32** [[TMP16]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK-USE-PPC64LE-NEXT:    store i64 4, i64* [[TMP17]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP7:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK-USE-PPC64LE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP7]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.3 to i8*), i64 24, i1 false)
+// CHECK-USE-PPC64LE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to %struct.ST**
+// CHECK-USE-PPC64LE-NEXT:    store %struct.ST* [[THIS1]], %struct.ST** [[TMP9]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to i32**
+// CHECK-USE-PPC64LE-NEXT:    store i32* [[I]], i32** [[TMP11]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    store i64 [[TMP6]], i64* [[TMP12]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK-USE-PPC64LE-NEXT:    store i8* null, i8** [[TMP13]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK-USE-PPC64LE-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to %struct.ST**
+// CHECK-USE-PPC64LE-NEXT:    store %struct.ST* [[THIS1]], %struct.ST** [[TMP15]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK-USE-PPC64LE-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32**
+// CHECK-USE-PPC64LE-NEXT:    store i32* [[I]], i32** [[TMP17]], align 8
 // CHECK-USE-PPC64LE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
 // CHECK-USE-PPC64LE-NEXT:    store i8* null, i8** [[TMP18]], align 8
 // CHECK-USE-PPC64LE-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
@@ -717,18 +693,16 @@ void explicit_maps_single(int ii) {
 // CHECK-USE-PPC64LE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
 // CHECK-USE-PPC64LE-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32**
 // CHECK-USE-PPC64LE-NEXT:    store i32* [[J]], i32** [[TMP22]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK-USE-PPC64LE-NEXT:    store i64 4, i64* [[TMP23]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK-USE-PPC64LE-NEXT:    store i8* null, i8** [[TMP24]], align 8
-// CHECK-USE-PPC64LE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-USE-PPC64LE-NEXT:    [[TMP28:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l919.region_id, i32 3, i8** [[TMP25]], i8** [[TMP26]], i64* [[TMP27]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null)
-// CHECK-USE-PPC64LE-NEXT:    [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
-// CHECK-USE-PPC64LE-NEXT:    br i1 [[TMP29]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK-USE-PPC64LE-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK-USE-PPC64LE-NEXT:    store i8* null, i8** [[TMP23]], align 8
+// CHECK-USE-PPC64LE-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-USE-PPC64LE-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l887.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null)
+// CHECK-USE-PPC64LE-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
+// CHECK-USE-PPC64LE-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK-USE-PPC64LE:       omp_offload.failed:
-// CHECK-USE-PPC64LE-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l919(%struct.ST* [[THIS1]]) #[[ATTR2]]
+// CHECK-USE-PPC64LE-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l887(%struct.ST* [[THIS1]]) #[[ATTR3]]
 // CHECK-USE-PPC64LE-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK-USE-PPC64LE:       omp_offload.cont:
 // CHECK-USE-PPC64LE-NEXT:    ret void
@@ -751,24 +725,24 @@ void explicit_maps_single(int ii) {
 // CHECK-USE-I386-NEXT:    [[TMP4:%.*]] = ptrtoint i8* [[TMP1]] to i64
 // CHECK-USE-I386-NEXT:    [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]]
 // CHECK-USE-I386-NEXT:    [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CHECK-USE-I386-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to %struct.ST**
-// CHECK-USE-I386-NEXT:    store %struct.ST* [[THIS1]], %struct.ST** [[TMP8]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32**
-// CHECK-USE-I386-NEXT:    store i32* [[I]], i32** [[TMP10]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    store i64 [[TMP6]], i64* [[TMP11]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    store i8* null, i8** [[TMP12]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK-USE-I386-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to %struct.ST**
-// CHECK-USE-I386-NEXT:    store %struct.ST* [[THIS1]], %struct.ST** [[TMP14]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK-USE-I386-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32**
-// CHECK-USE-I386-NEXT:    store i32* [[I]], i32** [[TMP16]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK-USE-I386-NEXT:    store i64 4, i64* [[TMP17]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP7:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK-USE-I386-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP7]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.3 to i8*), i32 24, i1 false)
+// CHECK-USE-I386-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to %struct.ST**
+// CHECK-USE-I386-NEXT:    store %struct.ST* [[THIS1]], %struct.ST** [[TMP9]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to i32**
+// CHECK-USE-I386-NEXT:    store i32* [[I]], i32** [[TMP11]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    store i64 [[TMP6]], i64* [[TMP12]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    store i8* null, i8** [[TMP13]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK-USE-I386-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to %struct.ST**
+// CHECK-USE-I386-NEXT:    store %struct.ST* [[THIS1]], %struct.ST** [[TMP15]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK-USE-I386-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32**
+// CHECK-USE-I386-NEXT:    store i32* [[I]], i32** [[TMP17]], align 4
 // CHECK-USE-I386-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
 // CHECK-USE-I386-NEXT:    store i8* null, i8** [[TMP18]], align 4
 // CHECK-USE-I386-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
@@ -777,18 +751,16 @@ void explicit_maps_single(int ii) {
 // CHECK-USE-I386-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
 // CHECK-USE-I386-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32**
 // CHECK-USE-I386-NEXT:    store i32* [[J]], i32** [[TMP22]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK-USE-I386-NEXT:    store i64 4, i64* [[TMP23]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK-USE-I386-NEXT:    store i8* null, i8** [[TMP24]], align 4
-// CHECK-USE-I386-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-USE-I386-NEXT:    [[TMP28:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l919.region_id, i32 3, i8** [[TMP25]], i8** [[TMP26]], i64* [[TMP27]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null)
-// CHECK-USE-I386-NEXT:    [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
-// CHECK-USE-I386-NEXT:    br i1 [[TMP29]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK-USE-I386-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK-USE-I386-NEXT:    store i8* null, i8** [[TMP23]], align 4
+// CHECK-USE-I386-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-USE-I386-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l887.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null)
+// CHECK-USE-I386-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
+// CHECK-USE-I386-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK-USE-I386:       omp_offload.failed:
-// CHECK-USE-I386-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l919(%struct.ST* [[THIS1]]) #[[ATTR2]]
+// CHECK-USE-I386-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l887(%struct.ST* [[THIS1]]) #[[ATTR3]]
 // CHECK-USE-I386-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK-USE-I386:       omp_offload.cont:
 // CHECK-USE-I386-NEXT:    ret void
@@ -811,24 +783,24 @@ void explicit_maps_single(int ii) {
 // CHECK-NOUSE-PPC64LE-NEXT:    [[TMP4:%.*]] = ptrtoint i8* [[TMP1]] to i64
 // CHECK-NOUSE-PPC64LE-NEXT:    [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]]
 // CHECK-NOUSE-PPC64LE-NEXT:    [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to %struct.ST**
-// CHECK-NOUSE-PPC64LE-NEXT:    store %struct.ST* [[THIS1]], %struct.ST** [[TMP8]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32**
-// CHECK-NOUSE-PPC64LE-NEXT:    store i32* [[I]], i32** [[TMP10]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    store i64 [[TMP6]], i64* [[TMP11]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK-NOUSE-PPC64LE-NEXT:    store i8* null, i8** [[TMP12]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to %struct.ST**
-// CHECK-NOUSE-PPC64LE-NEXT:    store %struct.ST* [[THIS1]], %struct.ST** [[TMP14]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32**
-// CHECK-NOUSE-PPC64LE-NEXT:    store i32* [[I]], i32** [[TMP16]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK-NOUSE-PPC64LE-NEXT:    store i64 4, i64* [[TMP17]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP7:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK-NOUSE-PPC64LE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP7]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.3 to i8*), i64 24, i1 false)
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to %struct.ST**
+// CHECK-NOUSE-PPC64LE-NEXT:    store %struct.ST* [[THIS1]], %struct.ST** [[TMP9]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to i32**
+// CHECK-NOUSE-PPC64LE-NEXT:    store i32* [[I]], i32** [[TMP11]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    store i64 [[TMP6]], i64* [[TMP12]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK-NOUSE-PPC64LE-NEXT:    store i8* null, i8** [[TMP13]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to %struct.ST**
+// CHECK-NOUSE-PPC64LE-NEXT:    store %struct.ST* [[THIS1]], %struct.ST** [[TMP15]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32**
+// CHECK-NOUSE-PPC64LE-NEXT:    store i32* [[I]], i32** [[TMP17]], align 8
 // CHECK-NOUSE-PPC64LE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
 // CHECK-NOUSE-PPC64LE-NEXT:    store i8* null, i8** [[TMP18]], align 8
 // CHECK-NOUSE-PPC64LE-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
@@ -837,18 +809,16 @@ void explicit_maps_single(int ii) {
 // CHECK-NOUSE-PPC64LE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
 // CHECK-NOUSE-PPC64LE-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32**
 // CHECK-NOUSE-PPC64LE-NEXT:    store i32* [[J]], i32** [[TMP22]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK-NOUSE-PPC64LE-NEXT:    store i64 4, i64* [[TMP23]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK-NOUSE-PPC64LE-NEXT:    store i8* null, i8** [[TMP24]], align 8
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP28:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l919.region_id, i32 3, i8** [[TMP25]], i8** [[TMP26]], i64* [[TMP27]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null)
-// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
-// CHECK-NOUSE-PPC64LE-NEXT:    br i1 [[TMP29]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK-NOUSE-PPC64LE-NEXT:    store i8* null, i8** [[TMP23]], align 8
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l887.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null)
+// CHECK-NOUSE-PPC64LE-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
+// CHECK-NOUSE-PPC64LE-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK-NOUSE-PPC64LE:       omp_offload.failed:
-// CHECK-NOUSE-PPC64LE-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l919() #[[ATTR2]]
+// CHECK-NOUSE-PPC64LE-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l887() #[[ATTR3]]
 // CHECK-NOUSE-PPC64LE-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK-NOUSE-PPC64LE:       omp_offload.cont:
 // CHECK-NOUSE-PPC64LE-NEXT:    ret void
@@ -871,24 +841,24 @@ void explicit_maps_single(int ii) {
 // CHECK-NOUSE-I386-NEXT:    [[TMP4:%.*]] = ptrtoint i8* [[TMP1]] to i64
 // CHECK-NOUSE-I386-NEXT:    [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]]
 // CHECK-NOUSE-I386-NEXT:    [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CHECK-NOUSE-I386-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to %struct.ST**
-// CHECK-NOUSE-I386-NEXT:    store %struct.ST* [[THIS1]], %struct.ST** [[TMP8]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32**
-// CHECK-NOUSE-I386-NEXT:    store i32* [[I]], i32** [[TMP10]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    store i64 [[TMP6]], i64* [[TMP11]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    store i8* null, i8** [[TMP12]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK-NOUSE-I386-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to %struct.ST**
-// CHECK-NOUSE-I386-NEXT:    store %struct.ST* [[THIS1]], %struct.ST** [[TMP14]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK-NOUSE-I386-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32**
-// CHECK-NOUSE-I386-NEXT:    store i32* [[I]], i32** [[TMP16]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK-NOUSE-I386-NEXT:    store i64 4, i64* [[TMP17]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP7:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK-NOUSE-I386-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP7]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.3 to i8*), i32 24, i1 false)
+// CHECK-NOUSE-I386-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to %struct.ST**
+// CHECK-NOUSE-I386-NEXT:    store %struct.ST* [[THIS1]], %struct.ST** [[TMP9]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to i32**
+// CHECK-NOUSE-I386-NEXT:    store i32* [[I]], i32** [[TMP11]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    store i64 [[TMP6]], i64* [[TMP12]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    store i8* null, i8** [[TMP13]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK-NOUSE-I386-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to %struct.ST**
+// CHECK-NOUSE-I386-NEXT:    store %struct.ST* [[THIS1]], %struct.ST** [[TMP15]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK-NOUSE-I386-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32**
+// CHECK-NOUSE-I386-NEXT:    store i32* [[I]], i32** [[TMP17]], align 4
 // CHECK-NOUSE-I386-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
 // CHECK-NOUSE-I386-NEXT:    store i8* null, i8** [[TMP18]], align 4
 // CHECK-NOUSE-I386-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
@@ -897,18 +867,16 @@ void explicit_maps_single(int ii) {
 // CHECK-NOUSE-I386-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
 // CHECK-NOUSE-I386-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32**
 // CHECK-NOUSE-I386-NEXT:    store i32* [[J]], i32** [[TMP22]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK-NOUSE-I386-NEXT:    store i64 4, i64* [[TMP23]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK-NOUSE-I386-NEXT:    store i8* null, i8** [[TMP24]], align 4
-// CHECK-NOUSE-I386-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK-NOUSE-I386-NEXT:    [[TMP28:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l919.region_id, i32 3, i8** [[TMP25]], i8** [[TMP26]], i64* [[TMP27]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null)
-// CHECK-NOUSE-I386-NEXT:    [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
-// CHECK-NOUSE-I386-NEXT:    br i1 [[TMP29]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK-NOUSE-I386-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK-NOUSE-I386-NEXT:    store i8* null, i8** [[TMP23]], align 4
+// CHECK-NOUSE-I386-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-NOUSE-I386-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l887.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null)
+// CHECK-NOUSE-I386-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
+// CHECK-NOUSE-I386-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK-NOUSE-I386:       omp_offload.failed:
-// CHECK-NOUSE-I386-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l919() #[[ATTR2]]
+// CHECK-NOUSE-I386-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2ST20test_present_membersEv_l887() #[[ATTR3]]
 // CHECK-NOUSE-I386-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK-NOUSE-I386:       omp_offload.cont:
 // CHECK-NOUSE-I386-NEXT:    ret void

diff  --git a/clang/test/OpenMP/target_map_member_expr_array_section_codegen.cpp b/clang/test/OpenMP/target_map_member_expr_array_section_codegen.cpp
index 8c15338ca9814..fefcf59274434 100644
--- a/clang/test/OpenMP/target_map_member_expr_array_section_codegen.cpp
+++ b/clang/test/OpenMP/target_map_member_expr_array_section_codegen.cpp
@@ -5,9 +5,11 @@
 #ifndef HEADER
 #define HEADER
 
+// CHECK: [[SIZE_ENTER:@.+]] = private unnamed_addr constant [2 x i64] [i64 0, i64 24]
 // 0 = OMP_MAP_NONE
 // 281474976710656 = 0x1000000000000 = OMP_MAP_MEMBER_OF of 1-st element
 // CHECK: [[MAP_ENTER:@.+]] = private unnamed_addr constant [2 x i64] [i64 0, i64 281474976710656]
+// CHECK: [[SIZE_EXIT:@.+]] = private unnamed_addr constant [2 x i64] [i64 0, i64 24]
 // 281474976710664 = 0x1000000000008 = OMP_MAP_MEMBER_OF of 1-st element | OMP_MAP_DELETE
 // CHECK: [[MAP_EXIT:@.+]] = private unnamed_addr constant [2 x i64] [i64 0, i64 281474976710664]
 template <typename T>
@@ -55,8 +57,6 @@ struct maptest {
     // CHECK: [[PTR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTRS]], i32 0, i32 1
     // CHECK: [[PTR1_DATA:%.+]] = bitcast i8** [[PTR1]] to float**
     // CHECK: store float* [[S_DATA_0_ADDR]], float** [[PTR1_DATA]],
-    // CHECK: [[SIZE1:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[SIZES]], i32 0, i32 1
-    // CHECK: store i64 24, i64* [[SIZE1]],
     // CHECK: [[BPTR:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BPTRS]], i32 0, i32 0
     // CHECK: [[PTR:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTRS]], i32 0, i32 0
     // CHECK: [[SIZE:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[SIZES]], i32 0, i32 0
@@ -99,8 +99,6 @@ struct maptest {
     // CHECK: [[PTR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTRS]], i32 0, i32 1
     // CHECK: [[PTR1_DATA:%.+]] = bitcast i8** [[PTR1]] to float**
     // CHECK: store float* [[S_DATA_0_ADDR]], float** [[PTR1_DATA]],
-    // CHECK: [[SIZE1:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[SIZES]], i32 0, i32 1
-    // CHECK: store i64 24, i64* [[SIZE1]],
     // CHECK: [[BPTR:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BPTRS]], i32 0, i32 0
     // CHECK: [[PTR:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTRS]], i32 0, i32 0
     // CHECK: [[SIZE:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[SIZES]], i32 0, i32 0

diff  --git a/clang/test/OpenMP/target_parallel_codegen.cpp b/clang/test/OpenMP/target_parallel_codegen.cpp
index 32b5751cb5648..f7d7fb5ad364c 100644
--- a/clang/test/OpenMP/target_parallel_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_codegen.cpp
@@ -417,14 +417,14 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP46:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK1-NEXT:    [[TMP47:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK1-NEXT:    [[TMP48:%.*]] = mul nuw i64 [[TMP47]], 8
-// CHECK1-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP44]], i64* [[TMP50]], align 8
-// CHECK1-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP44]], i64* [[TMP52]], align 8
-// CHECK1-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    store i64 4, i64* [[TMP53]], align 8
+// CHECK1-NEXT:    [[TMP49:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP49]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.8 to i8*), i64 72, i1 false)
+// CHECK1-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP44]], i64* [[TMP51]], align 8
+// CHECK1-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP53:%.*]] = bitcast i8** [[TMP52]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP44]], i64* [[TMP53]], align 8
 // CHECK1-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 0
 // CHECK1-NEXT:    store i8* null, i8** [[TMP54]], align 8
 // CHECK1-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 1
@@ -433,86 +433,74 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP58:%.*]] = bitcast i8** [[TMP57]] to [10 x float]**
 // CHECK1-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP58]], align 8
-// CHECK1-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK1-NEXT:    store i64 40, i64* [[TMP59]], align 8
-// CHECK1-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 1
-// CHECK1-NEXT:    store i8* null, i8** [[TMP60]], align 8
-// CHECK1-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 2
-// CHECK1-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP62]], align 8
-// CHECK1-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 2
-// CHECK1-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP64]], align 8
-// CHECK1-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK1-NEXT:    store i64 8, i64* [[TMP65]], align 8
-// CHECK1-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 2
-// CHECK1-NEXT:    store i8* null, i8** [[TMP66]], align 8
-// CHECK1-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 1
+// CHECK1-NEXT:    store i8* null, i8** [[TMP59]], align 8
+// CHECK1-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP61:%.*]] = bitcast i8** [[TMP60]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP61]], align 8
+// CHECK1-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP63:%.*]] = bitcast i8** [[TMP62]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP63]], align 8
+// CHECK1-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 2
+// CHECK1-NEXT:    store i8* null, i8** [[TMP64]], align 8
+// CHECK1-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP66:%.*]] = bitcast i8** [[TMP65]] to float**
+// CHECK1-NEXT:    store float* [[VLA]], float** [[TMP66]], align 8
+// CHECK1-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 3
 // CHECK1-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to float**
 // CHECK1-NEXT:    store float* [[VLA]], float** [[TMP68]], align 8
-// CHECK1-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 3
-// CHECK1-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to float**
-// CHECK1-NEXT:    store float* [[VLA]], float** [[TMP70]], align 8
-// CHECK1-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK1-NEXT:    store i64 [[TMP46]], i64* [[TMP71]], align 8
-// CHECK1-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 3
-// CHECK1-NEXT:    store i8* null, i8** [[TMP72]], align 8
-// CHECK1-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK1-NEXT:    store i64 [[TMP46]], i64* [[TMP69]], align 8
+// CHECK1-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 3
+// CHECK1-NEXT:    store i8* null, i8** [[TMP70]], align 8
+// CHECK1-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to [5 x [10 x double]]**
+// CHECK1-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP72]], align 8
+// CHECK1-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 4
 // CHECK1-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to [5 x [10 x double]]**
 // CHECK1-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP74]], align 8
-// CHECK1-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 4
-// CHECK1-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to [5 x [10 x double]]**
-// CHECK1-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP76]], align 8
-// CHECK1-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK1-NEXT:    store i64 400, i64* [[TMP77]], align 8
-// CHECK1-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 4
-// CHECK1-NEXT:    store i8* null, i8** [[TMP78]], align 8
-// CHECK1-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 5
-// CHECK1-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
-// CHECK1-NEXT:    store i64 5, i64* [[TMP80]], align 8
-// CHECK1-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 4
+// CHECK1-NEXT:    store i8* null, i8** [[TMP75]], align 8
+// CHECK1-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i64*
+// CHECK1-NEXT:    store i64 5, i64* [[TMP77]], align 8
+// CHECK1-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i64*
+// CHECK1-NEXT:    store i64 5, i64* [[TMP79]], align 8
+// CHECK1-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 5
+// CHECK1-NEXT:    store i8* null, i8** [[TMP80]], align 8
+// CHECK1-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 6
 // CHECK1-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i64*
-// CHECK1-NEXT:    store i64 5, i64* [[TMP82]], align 8
-// CHECK1-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK1-NEXT:    store i64 8, i64* [[TMP83]], align 8
-// CHECK1-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 5
-// CHECK1-NEXT:    store i8* null, i8** [[TMP84]], align 8
-// CHECK1-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 6
-// CHECK1-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP86]], align 8
-// CHECK1-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 6
-// CHECK1-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP88]], align 8
-// CHECK1-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK1-NEXT:    store i64 8, i64* [[TMP89]], align 8
-// CHECK1-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 6
-// CHECK1-NEXT:    store i8* null, i8** [[TMP90]], align 8
-// CHECK1-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 7
-// CHECK1-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to double**
-// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP92]], align 8
-// CHECK1-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 7
-// CHECK1-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to double**
-// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP94]], align 8
-// CHECK1-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK1-NEXT:    store i64 [[TMP48]], i64* [[TMP95]], align 8
-// CHECK1-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 7
+// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP82]], align 8
+// CHECK1-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 6
+// CHECK1-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP84]], align 8
+// CHECK1-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 6
+// CHECK1-NEXT:    store i8* null, i8** [[TMP85]], align 8
+// CHECK1-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 7
+// CHECK1-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to double**
+// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP87]], align 8
+// CHECK1-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 7
+// CHECK1-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to double**
+// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP89]], align 8
+// CHECK1-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK1-NEXT:    store i64 [[TMP48]], i64* [[TMP90]], align 8
+// CHECK1-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 7
+// CHECK1-NEXT:    store i8* null, i8** [[TMP91]], align 8
+// CHECK1-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 8
+// CHECK1-NEXT:    [[TMP93:%.*]] = bitcast i8** [[TMP92]] to %struct.TT**
+// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP93]], align 8
+// CHECK1-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 8
+// CHECK1-NEXT:    [[TMP95:%.*]] = bitcast i8** [[TMP94]] to %struct.TT**
+// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP95]], align 8
+// CHECK1-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 8
 // CHECK1-NEXT:    store i8* null, i8** [[TMP96]], align 8
-// CHECK1-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 8
-// CHECK1-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to %struct.TT**
-// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP98]], align 8
-// CHECK1-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 8
-// CHECK1-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to %struct.TT**
-// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP100]], align 8
-// CHECK1-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK1-NEXT:    store i64 16, i64* [[TMP101]], align 8
-// CHECK1-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 8
-// CHECK1-NEXT:    store i8* null, i8** [[TMP102]], align 8
-// CHECK1-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP106:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144.region_id, i32 9, i8** [[TMP103]], i8** [[TMP104]], i64* [[TMP105]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK1-NEXT:    [[TMP107:%.*]] = icmp ne i32 [[TMP106]], 0
-// CHECK1-NEXT:    br i1 [[TMP107]], label [[OMP_OFFLOAD_FAILED19:%.*]], label [[OMP_OFFLOAD_CONT20:%.*]]
+// CHECK1-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP100:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144.region_id, i32 9, i8** [[TMP97]], i8** [[TMP98]], i64* [[TMP99]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK1-NEXT:    [[TMP101:%.*]] = icmp ne i32 [[TMP100]], 0
+// CHECK1-NEXT:    br i1 [[TMP101]], label [[OMP_OFFLOAD_FAILED19:%.*]], label [[OMP_OFFLOAD_CONT20:%.*]]
 // CHECK1:       omp_offload.failed19:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144(i64 [[TMP44]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT20]]
@@ -522,10 +510,10 @@ int bar(int n){
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144(i64 [[TMP44]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK1-NEXT:    br label [[OMP_IF_END22]]
 // CHECK1:       omp_if.end22:
-// CHECK1-NEXT:    [[TMP108:%.*]] = load i32, i32* [[A]], align 4
-// CHECK1-NEXT:    [[TMP109:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP109]])
-// CHECK1-NEXT:    ret i32 [[TMP108]]
+// CHECK1-NEXT:    [[TMP102:%.*]] = load i32, i32* [[A]], align 4
+// CHECK1-NEXT:    [[TMP103:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP103]])
+// CHECK1-NEXT:    ret i32 [[TMP102]]
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100
@@ -888,14 +876,14 @@ int bar(int n){
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP8:%.*]] = mul nuw i64 2, [[TMP2]]
 // CHECK1-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2
-// CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK1-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
-// CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK1-NEXT:    store double* [[A]], double** [[TMP13]], align 8
-// CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    store i64 8, i64* [[TMP14]], align 8
+// CHECK1-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.11 to i8*), i64 40, i1 false)
+// CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK1-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 8
+// CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK1-NEXT:    store double* [[A]], double** [[TMP14]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK1-NEXT:    store i8* null, i8** [[TMP15]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -904,46 +892,40 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i64*
 // CHECK1-NEXT:    store i64 [[TMP6]], i64* [[TMP19]], align 8
-// CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK1-NEXT:    store i64 4, i64* [[TMP20]], align 8
-// CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK1-NEXT:    store i8* null, i8** [[TMP21]], align 8
-// CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK1-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
-// CHECK1-NEXT:    store i64 2, i64* [[TMP23]], align 8
-// CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK1-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK1-NEXT:    store i64 2, i64* [[TMP25]], align 8
-// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK1-NEXT:    store i64 8, i64* [[TMP26]], align 8
-// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK1-NEXT:    store i8* null, i8** [[TMP27]], align 8
-// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK1-NEXT:    store i8* null, i8** [[TMP20]], align 8
+// CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
+// CHECK1-NEXT:    store i64 2, i64* [[TMP22]], align 8
+// CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK1-NEXT:    store i64 2, i64* [[TMP24]], align 8
+// CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK1-NEXT:    store i8* null, i8** [[TMP25]], align 8
+// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP27]], align 8
+// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK1-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
 // CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP29]], align 8
-// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK1-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP31]], align 8
-// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK1-NEXT:    store i64 8, i64* [[TMP32]], align 8
-// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK1-NEXT:    store i8* null, i8** [[TMP33]], align 8
-// CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK1-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 8
-// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK1-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 8
-// CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK1-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 8
-// CHECK1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK1-NEXT:    store i8* null, i8** [[TMP39]], align 8
-// CHECK1-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK1-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK1-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK1-NEXT:    store i8* null, i8** [[TMP30]], align 8
+// CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 8
+// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 8
+// CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK1-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 8
+// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK1-NEXT:    store i8* null, i8** [[TMP36]], align 8
+// CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK1-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK1-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -953,15 +935,15 @@ int bar(int n){
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK1-NEXT:    br label [[OMP_IF_END]]
 // CHECK1:       omp_if.end:
-// CHECK1-NEXT:    [[TMP45:%.*]] = mul nsw i64 1, [[TMP2]]
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP45]]
+// CHECK1-NEXT:    [[TMP42:%.*]] = mul nsw i64 1, [[TMP2]]
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP42]]
 // CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
-// CHECK1-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK1-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK1-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP47]]
-// CHECK1-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK1-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK1-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK1-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP44]]
+// CHECK1-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK1-NEXT:    ret i32 [[ADD4]]
 //
 //
@@ -1033,7 +1015,7 @@ int bar(int n){
 // CHECK1-NEXT:    store i8* null, i8** [[TMP26]], align 8
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l198.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK1-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l198.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK1-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK1-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
@@ -1102,7 +1084,7 @@ int bar(int n){
 // CHECK1-NEXT:    store i8* null, i8** [[TMP19]], align 8
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l181.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.15, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK1-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l181.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK1-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK1-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
@@ -1141,11 +1123,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32*
 // CHECK1-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK1-NEXT:    [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8
-// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1214,11 +1196,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
 // CHECK1-NEXT:    store i8 [[TMP5]], i8* [[CONV5]], align 1
 // CHECK1-NEXT:    [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
-// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1279,11 +1261,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
 // CHECK1-NEXT:    store i16 [[TMP3]], i16* [[CONV3]], align 2
 // CHECK1-NEXT:    [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8
-// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1315,7 +1297,7 @@ int bar(int n){
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK1-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK1-SAME: () #[[ATTR8:[0-9]+]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK1-NEXT:    ret void
@@ -1447,14 +1429,14 @@ int bar(int n){
 // CHECK2-NEXT:    [[TMP46:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK2-NEXT:    [[TMP47:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK2-NEXT:    [[TMP48:%.*]] = mul nuw i64 [[TMP47]], 8
-// CHECK2-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP44]], i64* [[TMP50]], align 8
-// CHECK2-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP44]], i64* [[TMP52]], align 8
-// CHECK2-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK2-NEXT:    store i64 4, i64* [[TMP53]], align 8
+// CHECK2-NEXT:    [[TMP49:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP49]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.8 to i8*), i64 72, i1 false)
+// CHECK2-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP44]], i64* [[TMP51]], align 8
+// CHECK2-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP53:%.*]] = bitcast i8** [[TMP52]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP44]], i64* [[TMP53]], align 8
 // CHECK2-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 0
 // CHECK2-NEXT:    store i8* null, i8** [[TMP54]], align 8
 // CHECK2-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 1
@@ -1463,86 +1445,74 @@ int bar(int n){
 // CHECK2-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 1
 // CHECK2-NEXT:    [[TMP58:%.*]] = bitcast i8** [[TMP57]] to [10 x float]**
 // CHECK2-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP58]], align 8
-// CHECK2-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK2-NEXT:    store i64 40, i64* [[TMP59]], align 8
-// CHECK2-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 1
-// CHECK2-NEXT:    store i8* null, i8** [[TMP60]], align 8
-// CHECK2-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 2
-// CHECK2-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP62]], align 8
-// CHECK2-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 2
-// CHECK2-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP64]], align 8
-// CHECK2-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK2-NEXT:    store i64 8, i64* [[TMP65]], align 8
-// CHECK2-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 2
-// CHECK2-NEXT:    store i8* null, i8** [[TMP66]], align 8
-// CHECK2-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 3
+// CHECK2-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 1
+// CHECK2-NEXT:    store i8* null, i8** [[TMP59]], align 8
+// CHECK2-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP61:%.*]] = bitcast i8** [[TMP60]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP61]], align 8
+// CHECK2-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP63:%.*]] = bitcast i8** [[TMP62]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP63]], align 8
+// CHECK2-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 2
+// CHECK2-NEXT:    store i8* null, i8** [[TMP64]], align 8
+// CHECK2-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 3
+// CHECK2-NEXT:    [[TMP66:%.*]] = bitcast i8** [[TMP65]] to float**
+// CHECK2-NEXT:    store float* [[VLA]], float** [[TMP66]], align 8
+// CHECK2-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 3
 // CHECK2-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to float**
 // CHECK2-NEXT:    store float* [[VLA]], float** [[TMP68]], align 8
-// CHECK2-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 3
-// CHECK2-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to float**
-// CHECK2-NEXT:    store float* [[VLA]], float** [[TMP70]], align 8
-// CHECK2-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK2-NEXT:    store i64 [[TMP46]], i64* [[TMP71]], align 8
-// CHECK2-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 3
-// CHECK2-NEXT:    store i8* null, i8** [[TMP72]], align 8
-// CHECK2-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 4
+// CHECK2-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK2-NEXT:    store i64 [[TMP46]], i64* [[TMP69]], align 8
+// CHECK2-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 3
+// CHECK2-NEXT:    store i8* null, i8** [[TMP70]], align 8
+// CHECK2-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 4
+// CHECK2-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to [5 x [10 x double]]**
+// CHECK2-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP72]], align 8
+// CHECK2-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 4
 // CHECK2-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to [5 x [10 x double]]**
 // CHECK2-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP74]], align 8
-// CHECK2-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 4
-// CHECK2-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to [5 x [10 x double]]**
-// CHECK2-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP76]], align 8
-// CHECK2-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK2-NEXT:    store i64 400, i64* [[TMP77]], align 8
-// CHECK2-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 4
-// CHECK2-NEXT:    store i8* null, i8** [[TMP78]], align 8
-// CHECK2-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 5
-// CHECK2-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
-// CHECK2-NEXT:    store i64 5, i64* [[TMP80]], align 8
-// CHECK2-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 5
+// CHECK2-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 4
+// CHECK2-NEXT:    store i8* null, i8** [[TMP75]], align 8
+// CHECK2-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 5
+// CHECK2-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i64*
+// CHECK2-NEXT:    store i64 5, i64* [[TMP77]], align 8
+// CHECK2-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 5
+// CHECK2-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i64*
+// CHECK2-NEXT:    store i64 5, i64* [[TMP79]], align 8
+// CHECK2-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 5
+// CHECK2-NEXT:    store i8* null, i8** [[TMP80]], align 8
+// CHECK2-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 6
 // CHECK2-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i64*
-// CHECK2-NEXT:    store i64 5, i64* [[TMP82]], align 8
-// CHECK2-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK2-NEXT:    store i64 8, i64* [[TMP83]], align 8
-// CHECK2-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 5
-// CHECK2-NEXT:    store i8* null, i8** [[TMP84]], align 8
-// CHECK2-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 6
-// CHECK2-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP5]], i64* [[TMP86]], align 8
-// CHECK2-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 6
-// CHECK2-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP5]], i64* [[TMP88]], align 8
-// CHECK2-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK2-NEXT:    store i64 8, i64* [[TMP89]], align 8
-// CHECK2-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 6
-// CHECK2-NEXT:    store i8* null, i8** [[TMP90]], align 8
-// CHECK2-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 7
-// CHECK2-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to double**
-// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP92]], align 8
-// CHECK2-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 7
-// CHECK2-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to double**
-// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP94]], align 8
-// CHECK2-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK2-NEXT:    store i64 [[TMP48]], i64* [[TMP95]], align 8
-// CHECK2-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 7
+// CHECK2-NEXT:    store i64 [[TMP5]], i64* [[TMP82]], align 8
+// CHECK2-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 6
+// CHECK2-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP5]], i64* [[TMP84]], align 8
+// CHECK2-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 6
+// CHECK2-NEXT:    store i8* null, i8** [[TMP85]], align 8
+// CHECK2-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 7
+// CHECK2-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to double**
+// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP87]], align 8
+// CHECK2-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 7
+// CHECK2-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to double**
+// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP89]], align 8
+// CHECK2-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK2-NEXT:    store i64 [[TMP48]], i64* [[TMP90]], align 8
+// CHECK2-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 7
+// CHECK2-NEXT:    store i8* null, i8** [[TMP91]], align 8
+// CHECK2-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 8
+// CHECK2-NEXT:    [[TMP93:%.*]] = bitcast i8** [[TMP92]] to %struct.TT**
+// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP93]], align 8
+// CHECK2-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 8
+// CHECK2-NEXT:    [[TMP95:%.*]] = bitcast i8** [[TMP94]] to %struct.TT**
+// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP95]], align 8
+// CHECK2-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 8
 // CHECK2-NEXT:    store i8* null, i8** [[TMP96]], align 8
-// CHECK2-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 8
-// CHECK2-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to %struct.TT**
-// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP98]], align 8
-// CHECK2-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 8
-// CHECK2-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to %struct.TT**
-// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP100]], align 8
-// CHECK2-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK2-NEXT:    store i64 16, i64* [[TMP101]], align 8
-// CHECK2-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 8
-// CHECK2-NEXT:    store i8* null, i8** [[TMP102]], align 8
-// CHECK2-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP106:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144.region_id, i32 9, i8** [[TMP103]], i8** [[TMP104]], i64* [[TMP105]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK2-NEXT:    [[TMP107:%.*]] = icmp ne i32 [[TMP106]], 0
-// CHECK2-NEXT:    br i1 [[TMP107]], label [[OMP_OFFLOAD_FAILED19:%.*]], label [[OMP_OFFLOAD_CONT20:%.*]]
+// CHECK2-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP100:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144.region_id, i32 9, i8** [[TMP97]], i8** [[TMP98]], i64* [[TMP99]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK2-NEXT:    [[TMP101:%.*]] = icmp ne i32 [[TMP100]], 0
+// CHECK2-NEXT:    br i1 [[TMP101]], label [[OMP_OFFLOAD_FAILED19:%.*]], label [[OMP_OFFLOAD_CONT20:%.*]]
 // CHECK2:       omp_offload.failed19:
 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144(i64 [[TMP44]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT20]]
@@ -1552,10 +1522,10 @@ int bar(int n){
 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144(i64 [[TMP44]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK2-NEXT:    br label [[OMP_IF_END22]]
 // CHECK2:       omp_if.end22:
-// CHECK2-NEXT:    [[TMP108:%.*]] = load i32, i32* [[A]], align 4
-// CHECK2-NEXT:    [[TMP109:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP109]])
-// CHECK2-NEXT:    ret i32 [[TMP108]]
+// CHECK2-NEXT:    [[TMP102:%.*]] = load i32, i32* [[A]], align 4
+// CHECK2-NEXT:    [[TMP103:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP103]])
+// CHECK2-NEXT:    ret i32 [[TMP102]]
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100
@@ -1918,14 +1888,14 @@ int bar(int n){
 // CHECK2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP8:%.*]] = mul nuw i64 2, [[TMP2]]
 // CHECK2-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2
-// CHECK2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK2-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
-// CHECK2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK2-NEXT:    store double* [[A]], double** [[TMP13]], align 8
-// CHECK2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK2-NEXT:    store i64 8, i64* [[TMP14]], align 8
+// CHECK2-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.11 to i8*), i64 40, i1 false)
+// CHECK2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK2-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 8
+// CHECK2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK2-NEXT:    store double* [[A]], double** [[TMP14]], align 8
 // CHECK2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK2-NEXT:    store i8* null, i8** [[TMP15]], align 8
 // CHECK2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1934,46 +1904,40 @@ int bar(int n){
 // CHECK2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK2-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i64*
 // CHECK2-NEXT:    store i64 [[TMP6]], i64* [[TMP19]], align 8
-// CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK2-NEXT:    store i64 4, i64* [[TMP20]], align 8
-// CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK2-NEXT:    store i8* null, i8** [[TMP21]], align 8
-// CHECK2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK2-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
-// CHECK2-NEXT:    store i64 2, i64* [[TMP23]], align 8
-// CHECK2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK2-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK2-NEXT:    store i64 2, i64* [[TMP25]], align 8
-// CHECK2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK2-NEXT:    store i64 8, i64* [[TMP26]], align 8
-// CHECK2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK2-NEXT:    store i8* null, i8** [[TMP27]], align 8
-// CHECK2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK2-NEXT:    store i8* null, i8** [[TMP20]], align 8
+// CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
+// CHECK2-NEXT:    store i64 2, i64* [[TMP22]], align 8
+// CHECK2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK2-NEXT:    store i64 2, i64* [[TMP24]], align 8
+// CHECK2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK2-NEXT:    store i8* null, i8** [[TMP25]], align 8
+// CHECK2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK2-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP27]], align 8
+// CHECK2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK2-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
 // CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP29]], align 8
-// CHECK2-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK2-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP31]], align 8
-// CHECK2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK2-NEXT:    store i64 8, i64* [[TMP32]], align 8
-// CHECK2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK2-NEXT:    store i8* null, i8** [[TMP33]], align 8
-// CHECK2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK2-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 8
-// CHECK2-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK2-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 8
-// CHECK2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK2-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 8
-// CHECK2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK2-NEXT:    store i8* null, i8** [[TMP39]], align 8
-// CHECK2-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK2-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK2-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK2-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK2-NEXT:    store i8* null, i8** [[TMP30]], align 8
+// CHECK2-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK2-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 8
+// CHECK2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK2-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 8
+// CHECK2-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK2-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 8
+// CHECK2-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK2-NEXT:    store i8* null, i8** [[TMP36]], align 8
+// CHECK2-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK2-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK2-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK2:       omp_offload.failed:
 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -1983,15 +1947,15 @@ int bar(int n){
 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK2-NEXT:    br label [[OMP_IF_END]]
 // CHECK2:       omp_if.end:
-// CHECK2-NEXT:    [[TMP45:%.*]] = mul nsw i64 1, [[TMP2]]
-// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP45]]
+// CHECK2-NEXT:    [[TMP42:%.*]] = mul nsw i64 1, [[TMP2]]
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP42]]
 // CHECK2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
-// CHECK2-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK2-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK2-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP47]]
-// CHECK2-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK2-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK2-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK2-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP44]]
+// CHECK2-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK2-NEXT:    ret i32 [[ADD4]]
 //
 //
@@ -2063,7 +2027,7 @@ int bar(int n){
 // CHECK2-NEXT:    store i8* null, i8** [[TMP26]], align 8
 // CHECK2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l198.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK2-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l198.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK2-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK2-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK2:       omp_offload.failed:
@@ -2132,7 +2096,7 @@ int bar(int n){
 // CHECK2-NEXT:    store i8* null, i8** [[TMP19]], align 8
 // CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l181.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.15, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK2-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l181.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK2-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK2-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK2:       omp_offload.failed:
@@ -2171,11 +2135,11 @@ int bar(int n){
 // CHECK2-NEXT:    [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32*
 // CHECK2-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK2-NEXT:    [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2244,11 +2208,11 @@ int bar(int n){
 // CHECK2-NEXT:    [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
 // CHECK2-NEXT:    store i8 [[TMP5]], i8* [[CONV5]], align 1
 // CHECK2-NEXT:    [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2309,11 +2273,11 @@ int bar(int n){
 // CHECK2-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
 // CHECK2-NEXT:    store i16 [[TMP3]], i16* [[CONV3]], align 2
 // CHECK2-NEXT:    [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2345,7 +2309,7 @@ int bar(int n){
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK2-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK2-SAME: () #[[ATTR8:[0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK2-NEXT:    ret void
@@ -2474,14 +2438,14 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP46:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK3-NEXT:    [[TMP47:%.*]] = mul nuw i32 [[TMP46]], 8
 // CHECK3-NEXT:    [[TMP48:%.*]] = sext i32 [[TMP47]] to i64
-// CHECK3-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP42]], i32* [[TMP50]], align 4
-// CHECK3-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP42]], i32* [[TMP52]], align 4
-// CHECK3-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    store i64 4, i64* [[TMP53]], align 4
+// CHECK3-NEXT:    [[TMP49:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP49]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.8 to i8*), i32 72, i1 false)
+// CHECK3-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP42]], i32* [[TMP51]], align 4
+// CHECK3-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP53:%.*]] = bitcast i8** [[TMP52]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP42]], i32* [[TMP53]], align 4
 // CHECK3-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 0
 // CHECK3-NEXT:    store i8* null, i8** [[TMP54]], align 4
 // CHECK3-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 1
@@ -2490,86 +2454,74 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP58:%.*]] = bitcast i8** [[TMP57]] to [10 x float]**
 // CHECK3-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP58]], align 4
-// CHECK3-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK3-NEXT:    store i64 40, i64* [[TMP59]], align 4
-// CHECK3-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 1
-// CHECK3-NEXT:    store i8* null, i8** [[TMP60]], align 4
-// CHECK3-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 2
-// CHECK3-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP62]], align 4
-// CHECK3-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 2
-// CHECK3-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP64]], align 4
-// CHECK3-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK3-NEXT:    store i64 4, i64* [[TMP65]], align 4
-// CHECK3-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 2
-// CHECK3-NEXT:    store i8* null, i8** [[TMP66]], align 4
-// CHECK3-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 1
+// CHECK3-NEXT:    store i8* null, i8** [[TMP59]], align 4
+// CHECK3-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP61:%.*]] = bitcast i8** [[TMP60]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP61]], align 4
+// CHECK3-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP63:%.*]] = bitcast i8** [[TMP62]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP63]], align 4
+// CHECK3-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 2
+// CHECK3-NEXT:    store i8* null, i8** [[TMP64]], align 4
+// CHECK3-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP66:%.*]] = bitcast i8** [[TMP65]] to float**
+// CHECK3-NEXT:    store float* [[VLA]], float** [[TMP66]], align 4
+// CHECK3-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 3
 // CHECK3-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to float**
 // CHECK3-NEXT:    store float* [[VLA]], float** [[TMP68]], align 4
-// CHECK3-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 3
-// CHECK3-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to float**
-// CHECK3-NEXT:    store float* [[VLA]], float** [[TMP70]], align 4
-// CHECK3-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK3-NEXT:    store i64 [[TMP45]], i64* [[TMP71]], align 4
-// CHECK3-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 3
-// CHECK3-NEXT:    store i8* null, i8** [[TMP72]], align 4
-// CHECK3-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK3-NEXT:    store i64 [[TMP45]], i64* [[TMP69]], align 4
+// CHECK3-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 3
+// CHECK3-NEXT:    store i8* null, i8** [[TMP70]], align 4
+// CHECK3-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to [5 x [10 x double]]**
+// CHECK3-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP72]], align 4
+// CHECK3-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 4
 // CHECK3-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to [5 x [10 x double]]**
 // CHECK3-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP74]], align 4
-// CHECK3-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 4
-// CHECK3-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to [5 x [10 x double]]**
-// CHECK3-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP76]], align 4
-// CHECK3-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK3-NEXT:    store i64 400, i64* [[TMP77]], align 4
-// CHECK3-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 4
-// CHECK3-NEXT:    store i8* null, i8** [[TMP78]], align 4
-// CHECK3-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 5
-// CHECK3-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
-// CHECK3-NEXT:    store i32 5, i32* [[TMP80]], align 4
-// CHECK3-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 5
+// CHECK3-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 4
+// CHECK3-NEXT:    store i8* null, i8** [[TMP75]], align 4
+// CHECK3-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 5
+// CHECK3-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i32*
+// CHECK3-NEXT:    store i32 5, i32* [[TMP77]], align 4
+// CHECK3-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 5
+// CHECK3-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32*
+// CHECK3-NEXT:    store i32 5, i32* [[TMP79]], align 4
+// CHECK3-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 5
+// CHECK3-NEXT:    store i8* null, i8** [[TMP80]], align 4
+// CHECK3-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 6
 // CHECK3-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32*
-// CHECK3-NEXT:    store i32 5, i32* [[TMP82]], align 4
-// CHECK3-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK3-NEXT:    store i64 4, i64* [[TMP83]], align 4
-// CHECK3-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 5
-// CHECK3-NEXT:    store i8* null, i8** [[TMP84]], align 4
-// CHECK3-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 6
-// CHECK3-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP86]], align 4
-// CHECK3-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 6
-// CHECK3-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP88]], align 4
-// CHECK3-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK3-NEXT:    store i64 4, i64* [[TMP89]], align 4
-// CHECK3-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 6
-// CHECK3-NEXT:    store i8* null, i8** [[TMP90]], align 4
-// CHECK3-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 7
-// CHECK3-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to double**
-// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP92]], align 4
-// CHECK3-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 7
-// CHECK3-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to double**
-// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP94]], align 4
-// CHECK3-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK3-NEXT:    store i64 [[TMP48]], i64* [[TMP95]], align 4
-// CHECK3-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 7
+// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP82]], align 4
+// CHECK3-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 6
+// CHECK3-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP84]], align 4
+// CHECK3-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 6
+// CHECK3-NEXT:    store i8* null, i8** [[TMP85]], align 4
+// CHECK3-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 7
+// CHECK3-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to double**
+// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP87]], align 4
+// CHECK3-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 7
+// CHECK3-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to double**
+// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP89]], align 4
+// CHECK3-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK3-NEXT:    store i64 [[TMP48]], i64* [[TMP90]], align 4
+// CHECK3-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 7
+// CHECK3-NEXT:    store i8* null, i8** [[TMP91]], align 4
+// CHECK3-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 8
+// CHECK3-NEXT:    [[TMP93:%.*]] = bitcast i8** [[TMP92]] to %struct.TT**
+// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP93]], align 4
+// CHECK3-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 8
+// CHECK3-NEXT:    [[TMP95:%.*]] = bitcast i8** [[TMP94]] to %struct.TT**
+// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP95]], align 4
+// CHECK3-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 8
 // CHECK3-NEXT:    store i8* null, i8** [[TMP96]], align 4
-// CHECK3-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 8
-// CHECK3-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to %struct.TT**
-// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP98]], align 4
-// CHECK3-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 8
-// CHECK3-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to %struct.TT**
-// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP100]], align 4
-// CHECK3-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK3-NEXT:    store i64 12, i64* [[TMP101]], align 4
-// CHECK3-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 8
-// CHECK3-NEXT:    store i8* null, i8** [[TMP102]], align 4
-// CHECK3-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP106:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144.region_id, i32 9, i8** [[TMP103]], i8** [[TMP104]], i64* [[TMP105]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK3-NEXT:    [[TMP107:%.*]] = icmp ne i32 [[TMP106]], 0
-// CHECK3-NEXT:    br i1 [[TMP107]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK3-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP100:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144.region_id, i32 9, i8** [[TMP97]], i8** [[TMP98]], i64* [[TMP99]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK3-NEXT:    [[TMP101:%.*]] = icmp ne i32 [[TMP100]], 0
+// CHECK3-NEXT:    br i1 [[TMP101]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK3:       omp_offload.failed16:
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144(i32 [[TMP42]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
@@ -2579,10 +2531,10 @@ int bar(int n){
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144(i32 [[TMP42]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK3-NEXT:    br label [[OMP_IF_END19]]
 // CHECK3:       omp_if.end19:
-// CHECK3-NEXT:    [[TMP108:%.*]] = load i32, i32* [[A]], align 4
-// CHECK3-NEXT:    [[TMP109:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP109]])
-// CHECK3-NEXT:    ret i32 [[TMP108]]
+// CHECK3-NEXT:    [[TMP102:%.*]] = load i32, i32* [[A]], align 4
+// CHECK3-NEXT:    [[TMP103:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP103]])
+// CHECK3-NEXT:    ret i32 [[TMP102]]
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100
@@ -2935,14 +2887,14 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP7:%.*]] = mul nuw i32 2, [[TMP1]]
 // CHECK3-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 2
 // CHECK3-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-// CHECK3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK3-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
-// CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK3-NEXT:    store double* [[A]], double** [[TMP13]], align 4
-// CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    store i64 8, i64* [[TMP14]], align 4
+// CHECK3-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.11 to i8*), i32 40, i1 false)
+// CHECK3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK3-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 4
+// CHECK3-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK3-NEXT:    store double* [[A]], double** [[TMP14]], align 4
 // CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK3-NEXT:    store i8* null, i8** [[TMP15]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2951,46 +2903,40 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32*
 // CHECK3-NEXT:    store i32 [[TMP5]], i32* [[TMP19]], align 4
-// CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK3-NEXT:    store i64 4, i64* [[TMP20]], align 4
-// CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK3-NEXT:    store i8* null, i8** [[TMP21]], align 4
-// CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK3-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
-// CHECK3-NEXT:    store i32 2, i32* [[TMP23]], align 4
-// CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK3-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
-// CHECK3-NEXT:    store i32 2, i32* [[TMP25]], align 4
-// CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK3-NEXT:    store i64 4, i64* [[TMP26]], align 4
-// CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK3-NEXT:    store i8* null, i8** [[TMP27]], align 4
-// CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK3-NEXT:    store i8* null, i8** [[TMP20]], align 4
+// CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
+// CHECK3-NEXT:    store i32 2, i32* [[TMP22]], align 4
+// CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
+// CHECK3-NEXT:    store i32 2, i32* [[TMP24]], align 4
+// CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK3-NEXT:    store i8* null, i8** [[TMP25]], align 4
+// CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP27]], align 4
+// CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK3-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
 // CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP29]], align 4
-// CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK3-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP31]], align 4
-// CHECK3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK3-NEXT:    store i64 4, i64* [[TMP32]], align 4
-// CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK3-NEXT:    store i8* null, i8** [[TMP33]], align 4
-// CHECK3-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK3-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 4
-// CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK3-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 4
-// CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK3-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 4
-// CHECK3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK3-NEXT:    store i8* null, i8** [[TMP39]], align 4
-// CHECK3-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK3-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK3-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK3-NEXT:    store i8* null, i8** [[TMP30]], align 4
+// CHECK3-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 4
+// CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 4
+// CHECK3-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK3-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 4
+// CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK3-NEXT:    store i8* null, i8** [[TMP36]], align 4
+// CHECK3-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK3-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK3-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK3:       omp_offload.failed:
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -3000,15 +2946,15 @@ int bar(int n){
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK3-NEXT:    br label [[OMP_IF_END]]
 // CHECK3:       omp_if.end:
-// CHECK3-NEXT:    [[TMP45:%.*]] = mul nsw i32 1, [[TMP1]]
-// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP45]]
+// CHECK3-NEXT:    [[TMP42:%.*]] = mul nsw i32 1, [[TMP1]]
+// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP42]]
 // CHECK3-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
-// CHECK3-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK3-NEXT:    [[CONV:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK3-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP47]]
-// CHECK3-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK3-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK3-NEXT:    [[CONV:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK3-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP44]]
+// CHECK3-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK3-NEXT:    ret i32 [[ADD3]]
 //
 //
@@ -3079,7 +3025,7 @@ int bar(int n){
 // CHECK3-NEXT:    store i8* null, i8** [[TMP26]], align 4
 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l198.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK3-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l198.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK3-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK3-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK3:       omp_offload.failed:
@@ -3147,7 +3093,7 @@ int bar(int n){
 // CHECK3-NEXT:    store i8* null, i8** [[TMP19]], align 4
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l181.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.15, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK3-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l181.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK3-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK3-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK3:       omp_offload.failed:
@@ -3184,11 +3130,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
 // CHECK3-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -3254,11 +3200,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
 // CHECK3-NEXT:    store i8 [[TMP5]], i8* [[CONV3]], align 1
 // CHECK3-NEXT:    [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -3316,11 +3262,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
 // CHECK3-NEXT:    store i16 [[TMP3]], i16* [[CONV1]], align 2
 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -3351,7 +3297,7 @@ int bar(int n){
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK3-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK3-SAME: () #[[ATTR8:[0-9]+]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK3-NEXT:    ret void
@@ -3480,14 +3426,14 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP46:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK4-NEXT:    [[TMP47:%.*]] = mul nuw i32 [[TMP46]], 8
 // CHECK4-NEXT:    [[TMP48:%.*]] = sext i32 [[TMP47]] to i64
-// CHECK4-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP42]], i32* [[TMP50]], align 4
-// CHECK4-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP42]], i32* [[TMP52]], align 4
-// CHECK4-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK4-NEXT:    store i64 4, i64* [[TMP53]], align 4
+// CHECK4-NEXT:    [[TMP49:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK4-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP49]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.8 to i8*), i32 72, i1 false)
+// CHECK4-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP42]], i32* [[TMP51]], align 4
+// CHECK4-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP53:%.*]] = bitcast i8** [[TMP52]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP42]], i32* [[TMP53]], align 4
 // CHECK4-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 0
 // CHECK4-NEXT:    store i8* null, i8** [[TMP54]], align 4
 // CHECK4-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 1
@@ -3496,86 +3442,74 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 1
 // CHECK4-NEXT:    [[TMP58:%.*]] = bitcast i8** [[TMP57]] to [10 x float]**
 // CHECK4-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP58]], align 4
-// CHECK4-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK4-NEXT:    store i64 40, i64* [[TMP59]], align 4
-// CHECK4-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 1
-// CHECK4-NEXT:    store i8* null, i8** [[TMP60]], align 4
-// CHECK4-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 2
-// CHECK4-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP62]], align 4
-// CHECK4-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 2
-// CHECK4-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP64]], align 4
-// CHECK4-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK4-NEXT:    store i64 4, i64* [[TMP65]], align 4
-// CHECK4-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 2
-// CHECK4-NEXT:    store i8* null, i8** [[TMP66]], align 4
-// CHECK4-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 3
+// CHECK4-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 1
+// CHECK4-NEXT:    store i8* null, i8** [[TMP59]], align 4
+// CHECK4-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 2
+// CHECK4-NEXT:    [[TMP61:%.*]] = bitcast i8** [[TMP60]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP61]], align 4
+// CHECK4-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 2
+// CHECK4-NEXT:    [[TMP63:%.*]] = bitcast i8** [[TMP62]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP63]], align 4
+// CHECK4-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 2
+// CHECK4-NEXT:    store i8* null, i8** [[TMP64]], align 4
+// CHECK4-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 3
+// CHECK4-NEXT:    [[TMP66:%.*]] = bitcast i8** [[TMP65]] to float**
+// CHECK4-NEXT:    store float* [[VLA]], float** [[TMP66]], align 4
+// CHECK4-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 3
 // CHECK4-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to float**
 // CHECK4-NEXT:    store float* [[VLA]], float** [[TMP68]], align 4
-// CHECK4-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 3
-// CHECK4-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to float**
-// CHECK4-NEXT:    store float* [[VLA]], float** [[TMP70]], align 4
-// CHECK4-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK4-NEXT:    store i64 [[TMP45]], i64* [[TMP71]], align 4
-// CHECK4-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 3
-// CHECK4-NEXT:    store i8* null, i8** [[TMP72]], align 4
-// CHECK4-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 4
+// CHECK4-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK4-NEXT:    store i64 [[TMP45]], i64* [[TMP69]], align 4
+// CHECK4-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 3
+// CHECK4-NEXT:    store i8* null, i8** [[TMP70]], align 4
+// CHECK4-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 4
+// CHECK4-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to [5 x [10 x double]]**
+// CHECK4-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP72]], align 4
+// CHECK4-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 4
 // CHECK4-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to [5 x [10 x double]]**
 // CHECK4-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP74]], align 4
-// CHECK4-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 4
-// CHECK4-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to [5 x [10 x double]]**
-// CHECK4-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP76]], align 4
-// CHECK4-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK4-NEXT:    store i64 400, i64* [[TMP77]], align 4
-// CHECK4-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 4
-// CHECK4-NEXT:    store i8* null, i8** [[TMP78]], align 4
-// CHECK4-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 5
-// CHECK4-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
-// CHECK4-NEXT:    store i32 5, i32* [[TMP80]], align 4
-// CHECK4-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 5
+// CHECK4-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 4
+// CHECK4-NEXT:    store i8* null, i8** [[TMP75]], align 4
+// CHECK4-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 5
+// CHECK4-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i32*
+// CHECK4-NEXT:    store i32 5, i32* [[TMP77]], align 4
+// CHECK4-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 5
+// CHECK4-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32*
+// CHECK4-NEXT:    store i32 5, i32* [[TMP79]], align 4
+// CHECK4-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 5
+// CHECK4-NEXT:    store i8* null, i8** [[TMP80]], align 4
+// CHECK4-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 6
 // CHECK4-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32*
-// CHECK4-NEXT:    store i32 5, i32* [[TMP82]], align 4
-// CHECK4-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK4-NEXT:    store i64 4, i64* [[TMP83]], align 4
-// CHECK4-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 5
-// CHECK4-NEXT:    store i8* null, i8** [[TMP84]], align 4
-// CHECK4-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 6
-// CHECK4-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[TMP86]], align 4
-// CHECK4-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 6
-// CHECK4-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[TMP88]], align 4
-// CHECK4-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK4-NEXT:    store i64 4, i64* [[TMP89]], align 4
-// CHECK4-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 6
-// CHECK4-NEXT:    store i8* null, i8** [[TMP90]], align 4
-// CHECK4-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 7
-// CHECK4-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to double**
-// CHECK4-NEXT:    store double* [[VLA1]], double** [[TMP92]], align 4
-// CHECK4-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 7
-// CHECK4-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to double**
-// CHECK4-NEXT:    store double* [[VLA1]], double** [[TMP94]], align 4
-// CHECK4-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK4-NEXT:    store i64 [[TMP48]], i64* [[TMP95]], align 4
-// CHECK4-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 7
+// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[TMP82]], align 4
+// CHECK4-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 6
+// CHECK4-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[TMP84]], align 4
+// CHECK4-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 6
+// CHECK4-NEXT:    store i8* null, i8** [[TMP85]], align 4
+// CHECK4-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 7
+// CHECK4-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to double**
+// CHECK4-NEXT:    store double* [[VLA1]], double** [[TMP87]], align 4
+// CHECK4-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 7
+// CHECK4-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to double**
+// CHECK4-NEXT:    store double* [[VLA1]], double** [[TMP89]], align 4
+// CHECK4-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK4-NEXT:    store i64 [[TMP48]], i64* [[TMP90]], align 4
+// CHECK4-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 7
+// CHECK4-NEXT:    store i8* null, i8** [[TMP91]], align 4
+// CHECK4-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 8
+// CHECK4-NEXT:    [[TMP93:%.*]] = bitcast i8** [[TMP92]] to %struct.TT**
+// CHECK4-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP93]], align 4
+// CHECK4-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 8
+// CHECK4-NEXT:    [[TMP95:%.*]] = bitcast i8** [[TMP94]] to %struct.TT**
+// CHECK4-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP95]], align 4
+// CHECK4-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 8
 // CHECK4-NEXT:    store i8* null, i8** [[TMP96]], align 4
-// CHECK4-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 8
-// CHECK4-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to %struct.TT**
-// CHECK4-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP98]], align 4
-// CHECK4-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 8
-// CHECK4-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to %struct.TT**
-// CHECK4-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP100]], align 4
-// CHECK4-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK4-NEXT:    store i64 12, i64* [[TMP101]], align 4
-// CHECK4-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 8
-// CHECK4-NEXT:    store i8* null, i8** [[TMP102]], align 4
-// CHECK4-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP106:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144.region_id, i32 9, i8** [[TMP103]], i8** [[TMP104]], i64* [[TMP105]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK4-NEXT:    [[TMP107:%.*]] = icmp ne i32 [[TMP106]], 0
-// CHECK4-NEXT:    br i1 [[TMP107]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK4-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP100:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144.region_id, i32 9, i8** [[TMP97]], i8** [[TMP98]], i64* [[TMP99]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK4-NEXT:    [[TMP101:%.*]] = icmp ne i32 [[TMP100]], 0
+// CHECK4-NEXT:    br i1 [[TMP101]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK4:       omp_offload.failed16:
 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144(i32 [[TMP42]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK4-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
@@ -3585,10 +3519,10 @@ int bar(int n){
 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144(i32 [[TMP42]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK4-NEXT:    br label [[OMP_IF_END19]]
 // CHECK4:       omp_if.end19:
-// CHECK4-NEXT:    [[TMP108:%.*]] = load i32, i32* [[A]], align 4
-// CHECK4-NEXT:    [[TMP109:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP109]])
-// CHECK4-NEXT:    ret i32 [[TMP108]]
+// CHECK4-NEXT:    [[TMP102:%.*]] = load i32, i32* [[A]], align 4
+// CHECK4-NEXT:    [[TMP103:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP103]])
+// CHECK4-NEXT:    ret i32 [[TMP102]]
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100
@@ -3941,14 +3875,14 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP7:%.*]] = mul nuw i32 2, [[TMP1]]
 // CHECK4-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 2
 // CHECK4-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-// CHECK4-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK4-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
-// CHECK4-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK4-NEXT:    store double* [[A]], double** [[TMP13]], align 4
-// CHECK4-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK4-NEXT:    store i64 8, i64* [[TMP14]], align 4
+// CHECK4-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK4-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.11 to i8*), i32 40, i1 false)
+// CHECK4-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK4-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 4
+// CHECK4-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK4-NEXT:    store double* [[A]], double** [[TMP14]], align 4
 // CHECK4-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK4-NEXT:    store i8* null, i8** [[TMP15]], align 4
 // CHECK4-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -3957,46 +3891,40 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK4-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32*
 // CHECK4-NEXT:    store i32 [[TMP5]], i32* [[TMP19]], align 4
-// CHECK4-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK4-NEXT:    store i64 4, i64* [[TMP20]], align 4
-// CHECK4-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK4-NEXT:    store i8* null, i8** [[TMP21]], align 4
-// CHECK4-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK4-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
-// CHECK4-NEXT:    store i32 2, i32* [[TMP23]], align 4
-// CHECK4-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK4-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
-// CHECK4-NEXT:    store i32 2, i32* [[TMP25]], align 4
-// CHECK4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK4-NEXT:    store i64 4, i64* [[TMP26]], align 4
-// CHECK4-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK4-NEXT:    store i8* null, i8** [[TMP27]], align 4
-// CHECK4-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK4-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK4-NEXT:    store i8* null, i8** [[TMP20]], align 4
+// CHECK4-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK4-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
+// CHECK4-NEXT:    store i32 2, i32* [[TMP22]], align 4
+// CHECK4-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK4-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
+// CHECK4-NEXT:    store i32 2, i32* [[TMP24]], align 4
+// CHECK4-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK4-NEXT:    store i8* null, i8** [[TMP25]], align 4
+// CHECK4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK4-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP27]], align 4
+// CHECK4-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK4-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
 // CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP29]], align 4
-// CHECK4-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK4-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP31]], align 4
-// CHECK4-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK4-NEXT:    store i64 4, i64* [[TMP32]], align 4
-// CHECK4-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK4-NEXT:    store i8* null, i8** [[TMP33]], align 4
-// CHECK4-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK4-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK4-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 4
-// CHECK4-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK4-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK4-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 4
-// CHECK4-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK4-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 4
-// CHECK4-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK4-NEXT:    store i8* null, i8** [[TMP39]], align 4
-// CHECK4-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK4-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK4-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK4-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK4-NEXT:    store i8* null, i8** [[TMP30]], align 4
+// CHECK4-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK4-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK4-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 4
+// CHECK4-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK4-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK4-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 4
+// CHECK4-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK4-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 4
+// CHECK4-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK4-NEXT:    store i8* null, i8** [[TMP36]], align 4
+// CHECK4-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK4-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK4-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK4:       omp_offload.failed:
 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK4-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -4006,15 +3934,15 @@ int bar(int n){
 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK4-NEXT:    br label [[OMP_IF_END]]
 // CHECK4:       omp_if.end:
-// CHECK4-NEXT:    [[TMP45:%.*]] = mul nsw i32 1, [[TMP1]]
-// CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP45]]
+// CHECK4-NEXT:    [[TMP42:%.*]] = mul nsw i32 1, [[TMP1]]
+// CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP42]]
 // CHECK4-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
-// CHECK4-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK4-NEXT:    [[CONV:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK4-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK4-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP47]]
-// CHECK4-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK4-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK4-NEXT:    [[CONV:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK4-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK4-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP44]]
+// CHECK4-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK4-NEXT:    ret i32 [[ADD3]]
 //
 //
@@ -4085,7 +4013,7 @@ int bar(int n){
 // CHECK4-NEXT:    store i8* null, i8** [[TMP26]], align 4
 // CHECK4-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK4-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l198.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK4-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l198.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK4-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK4-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK4:       omp_offload.failed:
@@ -4153,7 +4081,7 @@ int bar(int n){
 // CHECK4-NEXT:    store i8* null, i8** [[TMP19]], align 4
 // CHECK4-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK4-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l181.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.15, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK4-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l181.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK4-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK4-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK4:       omp_offload.failed:
@@ -4190,11 +4118,11 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4
 // CHECK4-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
 // CHECK4-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -4260,11 +4188,11 @@ int bar(int n){
 // CHECK4-NEXT:    [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
 // CHECK4-NEXT:    store i8 [[TMP5]], i8* [[CONV3]], align 1
 // CHECK4-NEXT:    [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -4322,11 +4250,11 @@ int bar(int n){
 // CHECK4-NEXT:    [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
 // CHECK4-NEXT:    store i16 [[TMP3]], i16* [[CONV1]], align 2
 // CHECK4-NEXT:    [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -4357,7 +4285,7 @@ int bar(int n){
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK4-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK4-SAME: () #[[ATTR8:[0-9]+]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK4-NEXT:    ret void
@@ -6115,14 +6043,14 @@ int bar(int n){
 // CHECK17-NEXT:    [[TMP46:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK17-NEXT:    [[TMP47:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK17-NEXT:    [[TMP48:%.*]] = mul nuw i64 [[TMP47]], 8
-// CHECK17-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP44]], i64* [[TMP50]], align 8
-// CHECK17-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP44]], i64* [[TMP52]], align 8
-// CHECK17-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 4, i64* [[TMP53]], align 8
+// CHECK17-NEXT:    [[TMP49:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP49]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.8 to i8*), i64 72, i1 false)
+// CHECK17-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP44]], i64* [[TMP51]], align 8
+// CHECK17-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP53:%.*]] = bitcast i8** [[TMP52]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP44]], i64* [[TMP53]], align 8
 // CHECK17-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 0
 // CHECK17-NEXT:    store i8* null, i8** [[TMP54]], align 8
 // CHECK17-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 1
@@ -6131,86 +6059,74 @@ int bar(int n){
 // CHECK17-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 1
 // CHECK17-NEXT:    [[TMP58:%.*]] = bitcast i8** [[TMP57]] to [10 x float]**
 // CHECK17-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP58]], align 8
-// CHECK17-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 40, i64* [[TMP59]], align 8
-// CHECK17-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP60]], align 8
-// CHECK17-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP62]], align 8
-// CHECK17-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP64]], align 8
-// CHECK17-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 8, i64* [[TMP65]], align 8
-// CHECK17-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP66]], align 8
-// CHECK17-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP59]], align 8
+// CHECK17-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP61:%.*]] = bitcast i8** [[TMP60]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP61]], align 8
+// CHECK17-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP63:%.*]] = bitcast i8** [[TMP62]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP63]], align 8
+// CHECK17-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP64]], align 8
+// CHECK17-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP66:%.*]] = bitcast i8** [[TMP65]] to float**
+// CHECK17-NEXT:    store float* [[VLA]], float** [[TMP66]], align 8
+// CHECK17-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 3
 // CHECK17-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to float**
 // CHECK17-NEXT:    store float* [[VLA]], float** [[TMP68]], align 8
-// CHECK17-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to float**
-// CHECK17-NEXT:    store float* [[VLA]], float** [[TMP70]], align 8
-// CHECK17-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK17-NEXT:    store i64 [[TMP46]], i64* [[TMP71]], align 8
-// CHECK17-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 3
-// CHECK17-NEXT:    store i8* null, i8** [[TMP72]], align 8
-// CHECK17-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 4
+// CHECK17-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK17-NEXT:    store i64 [[TMP46]], i64* [[TMP69]], align 8
+// CHECK17-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 3
+// CHECK17-NEXT:    store i8* null, i8** [[TMP70]], align 8
+// CHECK17-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 4
+// CHECK17-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to [5 x [10 x double]]**
+// CHECK17-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP72]], align 8
+// CHECK17-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 4
 // CHECK17-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to [5 x [10 x double]]**
 // CHECK17-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP74]], align 8
-// CHECK17-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 4
-// CHECK17-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to [5 x [10 x double]]**
-// CHECK17-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP76]], align 8
-// CHECK17-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK17-NEXT:    store i64 400, i64* [[TMP77]], align 8
-// CHECK17-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 4
-// CHECK17-NEXT:    store i8* null, i8** [[TMP78]], align 8
-// CHECK17-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 5
-// CHECK17-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
-// CHECK17-NEXT:    store i64 5, i64* [[TMP80]], align 8
-// CHECK17-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 5
+// CHECK17-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 4
+// CHECK17-NEXT:    store i8* null, i8** [[TMP75]], align 8
+// CHECK17-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 5
+// CHECK17-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i64*
+// CHECK17-NEXT:    store i64 5, i64* [[TMP77]], align 8
+// CHECK17-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 5
+// CHECK17-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i64*
+// CHECK17-NEXT:    store i64 5, i64* [[TMP79]], align 8
+// CHECK17-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 5
+// CHECK17-NEXT:    store i8* null, i8** [[TMP80]], align 8
+// CHECK17-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 6
 // CHECK17-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i64*
-// CHECK17-NEXT:    store i64 5, i64* [[TMP82]], align 8
-// CHECK17-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK17-NEXT:    store i64 8, i64* [[TMP83]], align 8
-// CHECK17-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 5
-// CHECK17-NEXT:    store i8* null, i8** [[TMP84]], align 8
-// CHECK17-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 6
-// CHECK17-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP86]], align 8
-// CHECK17-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 6
-// CHECK17-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP88]], align 8
-// CHECK17-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK17-NEXT:    store i64 8, i64* [[TMP89]], align 8
-// CHECK17-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 6
-// CHECK17-NEXT:    store i8* null, i8** [[TMP90]], align 8
-// CHECK17-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 7
-// CHECK17-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to double**
-// CHECK17-NEXT:    store double* [[VLA1]], double** [[TMP92]], align 8
-// CHECK17-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 7
-// CHECK17-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to double**
-// CHECK17-NEXT:    store double* [[VLA1]], double** [[TMP94]], align 8
-// CHECK17-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK17-NEXT:    store i64 [[TMP48]], i64* [[TMP95]], align 8
-// CHECK17-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 7
+// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP82]], align 8
+// CHECK17-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 6
+// CHECK17-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP84]], align 8
+// CHECK17-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 6
+// CHECK17-NEXT:    store i8* null, i8** [[TMP85]], align 8
+// CHECK17-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 7
+// CHECK17-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to double**
+// CHECK17-NEXT:    store double* [[VLA1]], double** [[TMP87]], align 8
+// CHECK17-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 7
+// CHECK17-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to double**
+// CHECK17-NEXT:    store double* [[VLA1]], double** [[TMP89]], align 8
+// CHECK17-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK17-NEXT:    store i64 [[TMP48]], i64* [[TMP90]], align 8
+// CHECK17-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 7
+// CHECK17-NEXT:    store i8* null, i8** [[TMP91]], align 8
+// CHECK17-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 8
+// CHECK17-NEXT:    [[TMP93:%.*]] = bitcast i8** [[TMP92]] to %struct.TT**
+// CHECK17-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP93]], align 8
+// CHECK17-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 8
+// CHECK17-NEXT:    [[TMP95:%.*]] = bitcast i8** [[TMP94]] to %struct.TT**
+// CHECK17-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP95]], align 8
+// CHECK17-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 8
 // CHECK17-NEXT:    store i8* null, i8** [[TMP96]], align 8
-// CHECK17-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 8
-// CHECK17-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to %struct.TT**
-// CHECK17-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP98]], align 8
-// CHECK17-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 8
-// CHECK17-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to %struct.TT**
-// CHECK17-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP100]], align 8
-// CHECK17-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK17-NEXT:    store i64 16, i64* [[TMP101]], align 8
-// CHECK17-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 8
-// CHECK17-NEXT:    store i8* null, i8** [[TMP102]], align 8
-// CHECK17-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP106:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144.region_id, i32 9, i8** [[TMP103]], i8** [[TMP104]], i64* [[TMP105]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK17-NEXT:    [[TMP107:%.*]] = icmp ne i32 [[TMP106]], 0
-// CHECK17-NEXT:    br i1 [[TMP107]], label [[OMP_OFFLOAD_FAILED19:%.*]], label [[OMP_OFFLOAD_CONT20:%.*]]
+// CHECK17-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP100:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144.region_id, i32 9, i8** [[TMP97]], i8** [[TMP98]], i64* [[TMP99]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK17-NEXT:    [[TMP101:%.*]] = icmp ne i32 [[TMP100]], 0
+// CHECK17-NEXT:    br i1 [[TMP101]], label [[OMP_OFFLOAD_FAILED19:%.*]], label [[OMP_OFFLOAD_CONT20:%.*]]
 // CHECK17:       omp_offload.failed19:
 // CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144(i64 [[TMP44]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT20]]
@@ -6220,10 +6136,10 @@ int bar(int n){
 // CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144(i64 [[TMP44]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK17-NEXT:    br label [[OMP_IF_END22]]
 // CHECK17:       omp_if.end22:
-// CHECK17-NEXT:    [[TMP108:%.*]] = load i32, i32* [[A]], align 4
-// CHECK17-NEXT:    [[TMP109:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP109]])
-// CHECK17-NEXT:    ret i32 [[TMP108]]
+// CHECK17-NEXT:    [[TMP102:%.*]] = load i32, i32* [[A]], align 4
+// CHECK17-NEXT:    [[TMP103:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP103]])
+// CHECK17-NEXT:    ret i32 [[TMP102]]
 //
 //
 // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100
@@ -6586,14 +6502,14 @@ int bar(int n){
 // CHECK17-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP8:%.*]] = mul nuw i64 2, [[TMP2]]
 // CHECK17-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2
-// CHECK17-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK17-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
-// CHECK17-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK17-NEXT:    store double* [[A]], double** [[TMP13]], align 8
-// CHECK17-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 8, i64* [[TMP14]], align 8
+// CHECK17-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.11 to i8*), i64 40, i1 false)
+// CHECK17-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK17-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 8
+// CHECK17-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK17-NEXT:    store double* [[A]], double** [[TMP14]], align 8
 // CHECK17-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK17-NEXT:    store i8* null, i8** [[TMP15]], align 8
 // CHECK17-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -6602,46 +6518,40 @@ int bar(int n){
 // CHECK17-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK17-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i64*
 // CHECK17-NEXT:    store i64 [[TMP6]], i64* [[TMP19]], align 8
-// CHECK17-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 4, i64* [[TMP20]], align 8
-// CHECK17-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP21]], align 8
-// CHECK17-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
-// CHECK17-NEXT:    store i64 2, i64* [[TMP23]], align 8
-// CHECK17-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK17-NEXT:    store i64 2, i64* [[TMP25]], align 8
-// CHECK17-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 8, i64* [[TMP26]], align 8
-// CHECK17-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP27]], align 8
-// CHECK17-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP20]], align 8
+// CHECK17-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
+// CHECK17-NEXT:    store i64 2, i64* [[TMP22]], align 8
+// CHECK17-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK17-NEXT:    store i64 2, i64* [[TMP24]], align 8
+// CHECK17-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP25]], align 8
+// CHECK17-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP27]], align 8
+// CHECK17-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK17-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
 // CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP29]], align 8
-// CHECK17-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP31]], align 8
-// CHECK17-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK17-NEXT:    store i64 8, i64* [[TMP32]], align 8
-// CHECK17-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK17-NEXT:    store i8* null, i8** [[TMP33]], align 8
-// CHECK17-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK17-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK17-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 8
-// CHECK17-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK17-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK17-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 8
-// CHECK17-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK17-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 8
-// CHECK17-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK17-NEXT:    store i8* null, i8** [[TMP39]], align 8
-// CHECK17-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK17-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK17-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK17-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK17-NEXT:    store i8* null, i8** [[TMP30]], align 8
+// CHECK17-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK17-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK17-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 8
+// CHECK17-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK17-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK17-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 8
+// CHECK17-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK17-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 8
+// CHECK17-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK17-NEXT:    store i8* null, i8** [[TMP36]], align 8
+// CHECK17-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK17-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK17-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK17:       omp_offload.failed:
 // CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -6651,15 +6561,15 @@ int bar(int n){
 // CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK17-NEXT:    br label [[OMP_IF_END]]
 // CHECK17:       omp_if.end:
-// CHECK17-NEXT:    [[TMP45:%.*]] = mul nsw i64 1, [[TMP2]]
-// CHECK17-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP45]]
+// CHECK17-NEXT:    [[TMP42:%.*]] = mul nsw i64 1, [[TMP2]]
+// CHECK17-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP42]]
 // CHECK17-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
-// CHECK17-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK17-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK17-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK17-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP47]]
-// CHECK17-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK17-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK17-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK17-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK17-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP44]]
+// CHECK17-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK17-NEXT:    ret i32 [[ADD4]]
 //
 //
@@ -6731,7 +6641,7 @@ int bar(int n){
 // CHECK17-NEXT:    store i8* null, i8** [[TMP26]], align 8
 // CHECK17-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l198.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK17-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l198.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK17-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK17-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK17:       omp_offload.failed:
@@ -6800,7 +6710,7 @@ int bar(int n){
 // CHECK17-NEXT:    store i8* null, i8** [[TMP19]], align 8
 // CHECK17-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l181.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.15, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK17-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l181.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK17-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK17-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK17:       omp_offload.failed:
@@ -6839,11 +6749,11 @@ int bar(int n){
 // CHECK17-NEXT:    [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK17-NEXT:    [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -6912,11 +6822,11 @@ int bar(int n){
 // CHECK17-NEXT:    [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
 // CHECK17-NEXT:    store i8 [[TMP5]], i8* [[CONV5]], align 1
 // CHECK17-NEXT:    [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -6977,11 +6887,11 @@ int bar(int n){
 // CHECK17-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
 // CHECK17-NEXT:    store i16 [[TMP3]], i16* [[CONV3]], align 2
 // CHECK17-NEXT:    [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -7013,7 +6923,7 @@ int bar(int n){
 //
 //
 // CHECK17-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK17-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK17-SAME: () #[[ATTR8:[0-9]+]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK17-NEXT:    ret void
@@ -7145,14 +7055,14 @@ int bar(int n){
 // CHECK18-NEXT:    [[TMP46:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK18-NEXT:    [[TMP47:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK18-NEXT:    [[TMP48:%.*]] = mul nuw i64 [[TMP47]], 8
-// CHECK18-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP44]], i64* [[TMP50]], align 8
-// CHECK18-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP44]], i64* [[TMP52]], align 8
-// CHECK18-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 4, i64* [[TMP53]], align 8
+// CHECK18-NEXT:    [[TMP49:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP49]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.8 to i8*), i64 72, i1 false)
+// CHECK18-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP44]], i64* [[TMP51]], align 8
+// CHECK18-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP53:%.*]] = bitcast i8** [[TMP52]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP44]], i64* [[TMP53]], align 8
 // CHECK18-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 0
 // CHECK18-NEXT:    store i8* null, i8** [[TMP54]], align 8
 // CHECK18-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 1
@@ -7161,86 +7071,74 @@ int bar(int n){
 // CHECK18-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 1
 // CHECK18-NEXT:    [[TMP58:%.*]] = bitcast i8** [[TMP57]] to [10 x float]**
 // CHECK18-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP58]], align 8
-// CHECK18-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 40, i64* [[TMP59]], align 8
-// CHECK18-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP60]], align 8
-// CHECK18-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP62]], align 8
-// CHECK18-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP64]], align 8
-// CHECK18-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 8, i64* [[TMP65]], align 8
-// CHECK18-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP66]], align 8
-// CHECK18-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP59]], align 8
+// CHECK18-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP61:%.*]] = bitcast i8** [[TMP60]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP61]], align 8
+// CHECK18-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP63:%.*]] = bitcast i8** [[TMP62]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP63]], align 8
+// CHECK18-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP64]], align 8
+// CHECK18-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP66:%.*]] = bitcast i8** [[TMP65]] to float**
+// CHECK18-NEXT:    store float* [[VLA]], float** [[TMP66]], align 8
+// CHECK18-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 3
 // CHECK18-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to float**
 // CHECK18-NEXT:    store float* [[VLA]], float** [[TMP68]], align 8
-// CHECK18-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to float**
-// CHECK18-NEXT:    store float* [[VLA]], float** [[TMP70]], align 8
-// CHECK18-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK18-NEXT:    store i64 [[TMP46]], i64* [[TMP71]], align 8
-// CHECK18-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 3
-// CHECK18-NEXT:    store i8* null, i8** [[TMP72]], align 8
-// CHECK18-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 4
+// CHECK18-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK18-NEXT:    store i64 [[TMP46]], i64* [[TMP69]], align 8
+// CHECK18-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 3
+// CHECK18-NEXT:    store i8* null, i8** [[TMP70]], align 8
+// CHECK18-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 4
+// CHECK18-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to [5 x [10 x double]]**
+// CHECK18-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP72]], align 8
+// CHECK18-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 4
 // CHECK18-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to [5 x [10 x double]]**
 // CHECK18-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP74]], align 8
-// CHECK18-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 4
-// CHECK18-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to [5 x [10 x double]]**
-// CHECK18-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP76]], align 8
-// CHECK18-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK18-NEXT:    store i64 400, i64* [[TMP77]], align 8
-// CHECK18-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 4
-// CHECK18-NEXT:    store i8* null, i8** [[TMP78]], align 8
-// CHECK18-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 5
-// CHECK18-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
-// CHECK18-NEXT:    store i64 5, i64* [[TMP80]], align 8
-// CHECK18-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 5
+// CHECK18-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 4
+// CHECK18-NEXT:    store i8* null, i8** [[TMP75]], align 8
+// CHECK18-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 5
+// CHECK18-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i64*
+// CHECK18-NEXT:    store i64 5, i64* [[TMP77]], align 8
+// CHECK18-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 5
+// CHECK18-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i64*
+// CHECK18-NEXT:    store i64 5, i64* [[TMP79]], align 8
+// CHECK18-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 5
+// CHECK18-NEXT:    store i8* null, i8** [[TMP80]], align 8
+// CHECK18-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 6
 // CHECK18-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i64*
-// CHECK18-NEXT:    store i64 5, i64* [[TMP82]], align 8
-// CHECK18-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK18-NEXT:    store i64 8, i64* [[TMP83]], align 8
-// CHECK18-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 5
-// CHECK18-NEXT:    store i8* null, i8** [[TMP84]], align 8
-// CHECK18-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 6
-// CHECK18-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP86]], align 8
-// CHECK18-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 6
-// CHECK18-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP88]], align 8
-// CHECK18-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK18-NEXT:    store i64 8, i64* [[TMP89]], align 8
-// CHECK18-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 6
-// CHECK18-NEXT:    store i8* null, i8** [[TMP90]], align 8
-// CHECK18-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 7
-// CHECK18-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to double**
-// CHECK18-NEXT:    store double* [[VLA1]], double** [[TMP92]], align 8
-// CHECK18-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 7
-// CHECK18-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to double**
-// CHECK18-NEXT:    store double* [[VLA1]], double** [[TMP94]], align 8
-// CHECK18-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK18-NEXT:    store i64 [[TMP48]], i64* [[TMP95]], align 8
-// CHECK18-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 7
+// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP82]], align 8
+// CHECK18-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 6
+// CHECK18-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP84]], align 8
+// CHECK18-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 6
+// CHECK18-NEXT:    store i8* null, i8** [[TMP85]], align 8
+// CHECK18-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 7
+// CHECK18-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to double**
+// CHECK18-NEXT:    store double* [[VLA1]], double** [[TMP87]], align 8
+// CHECK18-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 7
+// CHECK18-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to double**
+// CHECK18-NEXT:    store double* [[VLA1]], double** [[TMP89]], align 8
+// CHECK18-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK18-NEXT:    store i64 [[TMP48]], i64* [[TMP90]], align 8
+// CHECK18-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 7
+// CHECK18-NEXT:    store i8* null, i8** [[TMP91]], align 8
+// CHECK18-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 8
+// CHECK18-NEXT:    [[TMP93:%.*]] = bitcast i8** [[TMP92]] to %struct.TT**
+// CHECK18-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP93]], align 8
+// CHECK18-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 8
+// CHECK18-NEXT:    [[TMP95:%.*]] = bitcast i8** [[TMP94]] to %struct.TT**
+// CHECK18-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP95]], align 8
+// CHECK18-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 8
 // CHECK18-NEXT:    store i8* null, i8** [[TMP96]], align 8
-// CHECK18-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 8
-// CHECK18-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to %struct.TT**
-// CHECK18-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP98]], align 8
-// CHECK18-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 8
-// CHECK18-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to %struct.TT**
-// CHECK18-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP100]], align 8
-// CHECK18-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK18-NEXT:    store i64 16, i64* [[TMP101]], align 8
-// CHECK18-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS18]], i64 0, i64 8
-// CHECK18-NEXT:    store i8* null, i8** [[TMP102]], align 8
-// CHECK18-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP106:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144.region_id, i32 9, i8** [[TMP103]], i8** [[TMP104]], i64* [[TMP105]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK18-NEXT:    [[TMP107:%.*]] = icmp ne i32 [[TMP106]], 0
-// CHECK18-NEXT:    br i1 [[TMP107]], label [[OMP_OFFLOAD_FAILED19:%.*]], label [[OMP_OFFLOAD_CONT20:%.*]]
+// CHECK18-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS16]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS17]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP100:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144.region_id, i32 9, i8** [[TMP97]], i8** [[TMP98]], i64* [[TMP99]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK18-NEXT:    [[TMP101:%.*]] = icmp ne i32 [[TMP100]], 0
+// CHECK18-NEXT:    br i1 [[TMP101]], label [[OMP_OFFLOAD_FAILED19:%.*]], label [[OMP_OFFLOAD_CONT20:%.*]]
 // CHECK18:       omp_offload.failed19:
 // CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144(i64 [[TMP44]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT20]]
@@ -7250,10 +7148,10 @@ int bar(int n){
 // CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144(i64 [[TMP44]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK18-NEXT:    br label [[OMP_IF_END22]]
 // CHECK18:       omp_if.end22:
-// CHECK18-NEXT:    [[TMP108:%.*]] = load i32, i32* [[A]], align 4
-// CHECK18-NEXT:    [[TMP109:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP109]])
-// CHECK18-NEXT:    ret i32 [[TMP108]]
+// CHECK18-NEXT:    [[TMP102:%.*]] = load i32, i32* [[A]], align 4
+// CHECK18-NEXT:    [[TMP103:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP103]])
+// CHECK18-NEXT:    ret i32 [[TMP102]]
 //
 //
 // CHECK18-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100
@@ -7616,14 +7514,14 @@ int bar(int n){
 // CHECK18-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP8:%.*]] = mul nuw i64 2, [[TMP2]]
 // CHECK18-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2
-// CHECK18-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK18-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
-// CHECK18-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK18-NEXT:    store double* [[A]], double** [[TMP13]], align 8
-// CHECK18-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 8, i64* [[TMP14]], align 8
+// CHECK18-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.11 to i8*), i64 40, i1 false)
+// CHECK18-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK18-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 8
+// CHECK18-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK18-NEXT:    store double* [[A]], double** [[TMP14]], align 8
 // CHECK18-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK18-NEXT:    store i8* null, i8** [[TMP15]], align 8
 // CHECK18-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -7632,46 +7530,40 @@ int bar(int n){
 // CHECK18-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK18-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i64*
 // CHECK18-NEXT:    store i64 [[TMP6]], i64* [[TMP19]], align 8
-// CHECK18-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 4, i64* [[TMP20]], align 8
-// CHECK18-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP21]], align 8
-// CHECK18-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
-// CHECK18-NEXT:    store i64 2, i64* [[TMP23]], align 8
-// CHECK18-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK18-NEXT:    store i64 2, i64* [[TMP25]], align 8
-// CHECK18-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 8, i64* [[TMP26]], align 8
-// CHECK18-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP27]], align 8
-// CHECK18-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP20]], align 8
+// CHECK18-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
+// CHECK18-NEXT:    store i64 2, i64* [[TMP22]], align 8
+// CHECK18-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK18-NEXT:    store i64 2, i64* [[TMP24]], align 8
+// CHECK18-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP25]], align 8
+// CHECK18-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP27]], align 8
+// CHECK18-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK18-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
 // CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP29]], align 8
-// CHECK18-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP31]], align 8
-// CHECK18-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK18-NEXT:    store i64 8, i64* [[TMP32]], align 8
-// CHECK18-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK18-NEXT:    store i8* null, i8** [[TMP33]], align 8
-// CHECK18-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK18-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK18-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 8
-// CHECK18-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK18-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK18-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 8
-// CHECK18-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK18-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 8
-// CHECK18-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK18-NEXT:    store i8* null, i8** [[TMP39]], align 8
-// CHECK18-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK18-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK18-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK18-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK18-NEXT:    store i8* null, i8** [[TMP30]], align 8
+// CHECK18-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK18-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK18-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 8
+// CHECK18-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK18-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK18-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 8
+// CHECK18-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK18-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 8
+// CHECK18-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK18-NEXT:    store i8* null, i8** [[TMP36]], align 8
+// CHECK18-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK18-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK18-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK18:       omp_offload.failed:
 // CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -7681,15 +7573,15 @@ int bar(int n){
 // CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK18-NEXT:    br label [[OMP_IF_END]]
 // CHECK18:       omp_if.end:
-// CHECK18-NEXT:    [[TMP45:%.*]] = mul nsw i64 1, [[TMP2]]
-// CHECK18-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP45]]
+// CHECK18-NEXT:    [[TMP42:%.*]] = mul nsw i64 1, [[TMP2]]
+// CHECK18-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP42]]
 // CHECK18-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
-// CHECK18-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK18-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK18-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK18-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP47]]
-// CHECK18-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK18-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK18-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK18-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK18-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP44]]
+// CHECK18-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK18-NEXT:    ret i32 [[ADD4]]
 //
 //
@@ -7761,7 +7653,7 @@ int bar(int n){
 // CHECK18-NEXT:    store i8* null, i8** [[TMP26]], align 8
 // CHECK18-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l198.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK18-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l198.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK18-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK18-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK18:       omp_offload.failed:
@@ -7830,7 +7722,7 @@ int bar(int n){
 // CHECK18-NEXT:    store i8* null, i8** [[TMP19]], align 8
 // CHECK18-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l181.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.15, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK18-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l181.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK18-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK18-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK18:       omp_offload.failed:
@@ -7869,11 +7761,11 @@ int bar(int n){
 // CHECK18-NEXT:    [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK18-NEXT:    [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -7942,11 +7834,11 @@ int bar(int n){
 // CHECK18-NEXT:    [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
 // CHECK18-NEXT:    store i8 [[TMP5]], i8* [[CONV5]], align 1
 // CHECK18-NEXT:    [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -8007,11 +7899,11 @@ int bar(int n){
 // CHECK18-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
 // CHECK18-NEXT:    store i16 [[TMP3]], i16* [[CONV3]], align 2
 // CHECK18-NEXT:    [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -8043,7 +7935,7 @@ int bar(int n){
 //
 //
 // CHECK18-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK18-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK18-SAME: () #[[ATTR8:[0-9]+]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK18-NEXT:    ret void
@@ -8172,14 +8064,14 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP46:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK19-NEXT:    [[TMP47:%.*]] = mul nuw i32 [[TMP46]], 8
 // CHECK19-NEXT:    [[TMP48:%.*]] = sext i32 [[TMP47]] to i64
-// CHECK19-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP42]], i32* [[TMP50]], align 4
-// CHECK19-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP42]], i32* [[TMP52]], align 4
-// CHECK19-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 4, i64* [[TMP53]], align 4
+// CHECK19-NEXT:    [[TMP49:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP49]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.8 to i8*), i32 72, i1 false)
+// CHECK19-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP42]], i32* [[TMP51]], align 4
+// CHECK19-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP53:%.*]] = bitcast i8** [[TMP52]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP42]], i32* [[TMP53]], align 4
 // CHECK19-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 0
 // CHECK19-NEXT:    store i8* null, i8** [[TMP54]], align 4
 // CHECK19-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 1
@@ -8188,86 +8080,74 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 1
 // CHECK19-NEXT:    [[TMP58:%.*]] = bitcast i8** [[TMP57]] to [10 x float]**
 // CHECK19-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP58]], align 4
-// CHECK19-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 40, i64* [[TMP59]], align 4
-// CHECK19-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP60]], align 4
-// CHECK19-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP62]], align 4
-// CHECK19-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP64]], align 4
-// CHECK19-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 4, i64* [[TMP65]], align 4
-// CHECK19-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP66]], align 4
-// CHECK19-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP59]], align 4
+// CHECK19-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP61:%.*]] = bitcast i8** [[TMP60]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP61]], align 4
+// CHECK19-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP63:%.*]] = bitcast i8** [[TMP62]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP63]], align 4
+// CHECK19-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP64]], align 4
+// CHECK19-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP66:%.*]] = bitcast i8** [[TMP65]] to float**
+// CHECK19-NEXT:    store float* [[VLA]], float** [[TMP66]], align 4
+// CHECK19-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 3
 // CHECK19-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to float**
 // CHECK19-NEXT:    store float* [[VLA]], float** [[TMP68]], align 4
-// CHECK19-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to float**
-// CHECK19-NEXT:    store float* [[VLA]], float** [[TMP70]], align 4
-// CHECK19-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK19-NEXT:    store i64 [[TMP45]], i64* [[TMP71]], align 4
-// CHECK19-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 3
-// CHECK19-NEXT:    store i8* null, i8** [[TMP72]], align 4
-// CHECK19-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 4
+// CHECK19-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK19-NEXT:    store i64 [[TMP45]], i64* [[TMP69]], align 4
+// CHECK19-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 3
+// CHECK19-NEXT:    store i8* null, i8** [[TMP70]], align 4
+// CHECK19-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 4
+// CHECK19-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to [5 x [10 x double]]**
+// CHECK19-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP72]], align 4
+// CHECK19-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 4
 // CHECK19-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to [5 x [10 x double]]**
 // CHECK19-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP74]], align 4
-// CHECK19-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 4
-// CHECK19-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to [5 x [10 x double]]**
-// CHECK19-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP76]], align 4
-// CHECK19-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK19-NEXT:    store i64 400, i64* [[TMP77]], align 4
-// CHECK19-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 4
-// CHECK19-NEXT:    store i8* null, i8** [[TMP78]], align 4
-// CHECK19-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 5
-// CHECK19-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
-// CHECK19-NEXT:    store i32 5, i32* [[TMP80]], align 4
-// CHECK19-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 5
+// CHECK19-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 4
+// CHECK19-NEXT:    store i8* null, i8** [[TMP75]], align 4
+// CHECK19-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 5
+// CHECK19-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i32*
+// CHECK19-NEXT:    store i32 5, i32* [[TMP77]], align 4
+// CHECK19-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 5
+// CHECK19-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32*
+// CHECK19-NEXT:    store i32 5, i32* [[TMP79]], align 4
+// CHECK19-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 5
+// CHECK19-NEXT:    store i8* null, i8** [[TMP80]], align 4
+// CHECK19-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 6
 // CHECK19-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32*
-// CHECK19-NEXT:    store i32 5, i32* [[TMP82]], align 4
-// CHECK19-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK19-NEXT:    store i64 4, i64* [[TMP83]], align 4
-// CHECK19-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 5
-// CHECK19-NEXT:    store i8* null, i8** [[TMP84]], align 4
-// CHECK19-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 6
-// CHECK19-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP86]], align 4
-// CHECK19-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 6
-// CHECK19-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP88]], align 4
-// CHECK19-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK19-NEXT:    store i64 4, i64* [[TMP89]], align 4
-// CHECK19-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 6
-// CHECK19-NEXT:    store i8* null, i8** [[TMP90]], align 4
-// CHECK19-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 7
-// CHECK19-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to double**
-// CHECK19-NEXT:    store double* [[VLA1]], double** [[TMP92]], align 4
-// CHECK19-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 7
-// CHECK19-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to double**
-// CHECK19-NEXT:    store double* [[VLA1]], double** [[TMP94]], align 4
-// CHECK19-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK19-NEXT:    store i64 [[TMP48]], i64* [[TMP95]], align 4
-// CHECK19-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 7
+// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP82]], align 4
+// CHECK19-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 6
+// CHECK19-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP84]], align 4
+// CHECK19-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 6
+// CHECK19-NEXT:    store i8* null, i8** [[TMP85]], align 4
+// CHECK19-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 7
+// CHECK19-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to double**
+// CHECK19-NEXT:    store double* [[VLA1]], double** [[TMP87]], align 4
+// CHECK19-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 7
+// CHECK19-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to double**
+// CHECK19-NEXT:    store double* [[VLA1]], double** [[TMP89]], align 4
+// CHECK19-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK19-NEXT:    store i64 [[TMP48]], i64* [[TMP90]], align 4
+// CHECK19-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 7
+// CHECK19-NEXT:    store i8* null, i8** [[TMP91]], align 4
+// CHECK19-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 8
+// CHECK19-NEXT:    [[TMP93:%.*]] = bitcast i8** [[TMP92]] to %struct.TT**
+// CHECK19-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP93]], align 4
+// CHECK19-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 8
+// CHECK19-NEXT:    [[TMP95:%.*]] = bitcast i8** [[TMP94]] to %struct.TT**
+// CHECK19-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP95]], align 4
+// CHECK19-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 8
 // CHECK19-NEXT:    store i8* null, i8** [[TMP96]], align 4
-// CHECK19-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 8
-// CHECK19-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to %struct.TT**
-// CHECK19-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP98]], align 4
-// CHECK19-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 8
-// CHECK19-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to %struct.TT**
-// CHECK19-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP100]], align 4
-// CHECK19-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK19-NEXT:    store i64 12, i64* [[TMP101]], align 4
-// CHECK19-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 8
-// CHECK19-NEXT:    store i8* null, i8** [[TMP102]], align 4
-// CHECK19-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP106:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144.region_id, i32 9, i8** [[TMP103]], i8** [[TMP104]], i64* [[TMP105]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK19-NEXT:    [[TMP107:%.*]] = icmp ne i32 [[TMP106]], 0
-// CHECK19-NEXT:    br i1 [[TMP107]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK19-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP100:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144.region_id, i32 9, i8** [[TMP97]], i8** [[TMP98]], i64* [[TMP99]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK19-NEXT:    [[TMP101:%.*]] = icmp ne i32 [[TMP100]], 0
+// CHECK19-NEXT:    br i1 [[TMP101]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK19:       omp_offload.failed16:
 // CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144(i32 [[TMP42]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
@@ -8277,10 +8157,10 @@ int bar(int n){
 // CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144(i32 [[TMP42]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK19-NEXT:    br label [[OMP_IF_END19]]
 // CHECK19:       omp_if.end19:
-// CHECK19-NEXT:    [[TMP108:%.*]] = load i32, i32* [[A]], align 4
-// CHECK19-NEXT:    [[TMP109:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP109]])
-// CHECK19-NEXT:    ret i32 [[TMP108]]
+// CHECK19-NEXT:    [[TMP102:%.*]] = load i32, i32* [[A]], align 4
+// CHECK19-NEXT:    [[TMP103:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP103]])
+// CHECK19-NEXT:    ret i32 [[TMP102]]
 //
 //
 // CHECK19-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100
@@ -8633,14 +8513,14 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP7:%.*]] = mul nuw i32 2, [[TMP1]]
 // CHECK19-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 2
 // CHECK19-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-// CHECK19-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK19-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
-// CHECK19-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK19-NEXT:    store double* [[A]], double** [[TMP13]], align 4
-// CHECK19-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 8, i64* [[TMP14]], align 4
+// CHECK19-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.11 to i8*), i32 40, i1 false)
+// CHECK19-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK19-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 4
+// CHECK19-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK19-NEXT:    store double* [[A]], double** [[TMP14]], align 4
 // CHECK19-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK19-NEXT:    store i8* null, i8** [[TMP15]], align 4
 // CHECK19-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -8649,46 +8529,40 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK19-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32*
 // CHECK19-NEXT:    store i32 [[TMP5]], i32* [[TMP19]], align 4
-// CHECK19-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP20]], align 4
-// CHECK19-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP21]], align 4
-// CHECK19-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
-// CHECK19-NEXT:    store i32 2, i32* [[TMP23]], align 4
-// CHECK19-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
-// CHECK19-NEXT:    store i32 2, i32* [[TMP25]], align 4
-// CHECK19-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 4, i64* [[TMP26]], align 4
-// CHECK19-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP27]], align 4
-// CHECK19-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP20]], align 4
+// CHECK19-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
+// CHECK19-NEXT:    store i32 2, i32* [[TMP22]], align 4
+// CHECK19-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
+// CHECK19-NEXT:    store i32 2, i32* [[TMP24]], align 4
+// CHECK19-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP25]], align 4
+// CHECK19-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP27]], align 4
+// CHECK19-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK19-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
 // CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP29]], align 4
-// CHECK19-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP31]], align 4
-// CHECK19-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK19-NEXT:    store i64 4, i64* [[TMP32]], align 4
-// CHECK19-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK19-NEXT:    store i8* null, i8** [[TMP33]], align 4
-// CHECK19-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK19-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK19-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 4
-// CHECK19-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK19-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK19-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 4
-// CHECK19-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK19-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 4
-// CHECK19-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK19-NEXT:    store i8* null, i8** [[TMP39]], align 4
-// CHECK19-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK19-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK19-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK19-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK19-NEXT:    store i8* null, i8** [[TMP30]], align 4
+// CHECK19-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK19-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK19-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 4
+// CHECK19-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK19-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK19-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 4
+// CHECK19-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK19-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 4
+// CHECK19-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK19-NEXT:    store i8* null, i8** [[TMP36]], align 4
+// CHECK19-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK19-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK19-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK19:       omp_offload.failed:
 // CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -8698,15 +8572,15 @@ int bar(int n){
 // CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK19-NEXT:    br label [[OMP_IF_END]]
 // CHECK19:       omp_if.end:
-// CHECK19-NEXT:    [[TMP45:%.*]] = mul nsw i32 1, [[TMP1]]
-// CHECK19-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP45]]
+// CHECK19-NEXT:    [[TMP42:%.*]] = mul nsw i32 1, [[TMP1]]
+// CHECK19-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP42]]
 // CHECK19-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
-// CHECK19-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK19-NEXT:    [[CONV:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK19-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK19-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP47]]
-// CHECK19-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK19-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK19-NEXT:    [[CONV:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK19-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK19-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP44]]
+// CHECK19-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK19-NEXT:    ret i32 [[ADD3]]
 //
 //
@@ -8777,7 +8651,7 @@ int bar(int n){
 // CHECK19-NEXT:    store i8* null, i8** [[TMP26]], align 4
 // CHECK19-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l198.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK19-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l198.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK19-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK19-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK19:       omp_offload.failed:
@@ -8845,7 +8719,7 @@ int bar(int n){
 // CHECK19-NEXT:    store i8* null, i8** [[TMP19]], align 4
 // CHECK19-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l181.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.15, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK19-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l181.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK19-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK19-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK19:       omp_offload.failed:
@@ -8882,11 +8756,11 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
 // CHECK19-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -8952,11 +8826,11 @@ int bar(int n){
 // CHECK19-NEXT:    [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
 // CHECK19-NEXT:    store i8 [[TMP5]], i8* [[CONV3]], align 1
 // CHECK19-NEXT:    [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -9014,11 +8888,11 @@ int bar(int n){
 // CHECK19-NEXT:    [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
 // CHECK19-NEXT:    store i16 [[TMP3]], i16* [[CONV1]], align 2
 // CHECK19-NEXT:    [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -9049,7 +8923,7 @@ int bar(int n){
 //
 //
 // CHECK19-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK19-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK19-SAME: () #[[ATTR8:[0-9]+]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK19-NEXT:    ret void
@@ -9178,14 +9052,14 @@ int bar(int n){
 // CHECK20-NEXT:    [[TMP46:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK20-NEXT:    [[TMP47:%.*]] = mul nuw i32 [[TMP46]], 8
 // CHECK20-NEXT:    [[TMP48:%.*]] = sext i32 [[TMP47]] to i64
-// CHECK20-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP42]], i32* [[TMP50]], align 4
-// CHECK20-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP42]], i32* [[TMP52]], align 4
-// CHECK20-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 4, i64* [[TMP53]], align 4
+// CHECK20-NEXT:    [[TMP49:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP49]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.8 to i8*), i32 72, i1 false)
+// CHECK20-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP42]], i32* [[TMP51]], align 4
+// CHECK20-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP53:%.*]] = bitcast i8** [[TMP52]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP42]], i32* [[TMP53]], align 4
 // CHECK20-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 0
 // CHECK20-NEXT:    store i8* null, i8** [[TMP54]], align 4
 // CHECK20-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 1
@@ -9194,86 +9068,74 @@ int bar(int n){
 // CHECK20-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 1
 // CHECK20-NEXT:    [[TMP58:%.*]] = bitcast i8** [[TMP57]] to [10 x float]**
 // CHECK20-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP58]], align 4
-// CHECK20-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 40, i64* [[TMP59]], align 4
-// CHECK20-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP60]], align 4
-// CHECK20-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP62]], align 4
-// CHECK20-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP64]], align 4
-// CHECK20-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 4, i64* [[TMP65]], align 4
-// CHECK20-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP66]], align 4
-// CHECK20-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP59]], align 4
+// CHECK20-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP61:%.*]] = bitcast i8** [[TMP60]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP61]], align 4
+// CHECK20-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP63:%.*]] = bitcast i8** [[TMP62]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP63]], align 4
+// CHECK20-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP64]], align 4
+// CHECK20-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP66:%.*]] = bitcast i8** [[TMP65]] to float**
+// CHECK20-NEXT:    store float* [[VLA]], float** [[TMP66]], align 4
+// CHECK20-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 3
 // CHECK20-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to float**
 // CHECK20-NEXT:    store float* [[VLA]], float** [[TMP68]], align 4
-// CHECK20-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to float**
-// CHECK20-NEXT:    store float* [[VLA]], float** [[TMP70]], align 4
-// CHECK20-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK20-NEXT:    store i64 [[TMP45]], i64* [[TMP71]], align 4
-// CHECK20-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 3
-// CHECK20-NEXT:    store i8* null, i8** [[TMP72]], align 4
-// CHECK20-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 4
+// CHECK20-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK20-NEXT:    store i64 [[TMP45]], i64* [[TMP69]], align 4
+// CHECK20-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 3
+// CHECK20-NEXT:    store i8* null, i8** [[TMP70]], align 4
+// CHECK20-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 4
+// CHECK20-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to [5 x [10 x double]]**
+// CHECK20-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP72]], align 4
+// CHECK20-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 4
 // CHECK20-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to [5 x [10 x double]]**
 // CHECK20-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP74]], align 4
-// CHECK20-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 4
-// CHECK20-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to [5 x [10 x double]]**
-// CHECK20-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP76]], align 4
-// CHECK20-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK20-NEXT:    store i64 400, i64* [[TMP77]], align 4
-// CHECK20-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 4
-// CHECK20-NEXT:    store i8* null, i8** [[TMP78]], align 4
-// CHECK20-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 5
-// CHECK20-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
-// CHECK20-NEXT:    store i32 5, i32* [[TMP80]], align 4
-// CHECK20-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 5
+// CHECK20-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 4
+// CHECK20-NEXT:    store i8* null, i8** [[TMP75]], align 4
+// CHECK20-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 5
+// CHECK20-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i32*
+// CHECK20-NEXT:    store i32 5, i32* [[TMP77]], align 4
+// CHECK20-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 5
+// CHECK20-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32*
+// CHECK20-NEXT:    store i32 5, i32* [[TMP79]], align 4
+// CHECK20-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 5
+// CHECK20-NEXT:    store i8* null, i8** [[TMP80]], align 4
+// CHECK20-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 6
 // CHECK20-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32*
-// CHECK20-NEXT:    store i32 5, i32* [[TMP82]], align 4
-// CHECK20-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK20-NEXT:    store i64 4, i64* [[TMP83]], align 4
-// CHECK20-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 5
-// CHECK20-NEXT:    store i8* null, i8** [[TMP84]], align 4
-// CHECK20-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 6
-// CHECK20-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP86]], align 4
-// CHECK20-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 6
-// CHECK20-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP88]], align 4
-// CHECK20-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK20-NEXT:    store i64 4, i64* [[TMP89]], align 4
-// CHECK20-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 6
-// CHECK20-NEXT:    store i8* null, i8** [[TMP90]], align 4
-// CHECK20-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 7
-// CHECK20-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to double**
-// CHECK20-NEXT:    store double* [[VLA1]], double** [[TMP92]], align 4
-// CHECK20-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 7
-// CHECK20-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to double**
-// CHECK20-NEXT:    store double* [[VLA1]], double** [[TMP94]], align 4
-// CHECK20-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK20-NEXT:    store i64 [[TMP48]], i64* [[TMP95]], align 4
-// CHECK20-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 7
+// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP82]], align 4
+// CHECK20-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 6
+// CHECK20-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP84]], align 4
+// CHECK20-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 6
+// CHECK20-NEXT:    store i8* null, i8** [[TMP85]], align 4
+// CHECK20-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 7
+// CHECK20-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to double**
+// CHECK20-NEXT:    store double* [[VLA1]], double** [[TMP87]], align 4
+// CHECK20-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 7
+// CHECK20-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to double**
+// CHECK20-NEXT:    store double* [[VLA1]], double** [[TMP89]], align 4
+// CHECK20-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK20-NEXT:    store i64 [[TMP48]], i64* [[TMP90]], align 4
+// CHECK20-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 7
+// CHECK20-NEXT:    store i8* null, i8** [[TMP91]], align 4
+// CHECK20-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 8
+// CHECK20-NEXT:    [[TMP93:%.*]] = bitcast i8** [[TMP92]] to %struct.TT**
+// CHECK20-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP93]], align 4
+// CHECK20-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 8
+// CHECK20-NEXT:    [[TMP95:%.*]] = bitcast i8** [[TMP94]] to %struct.TT**
+// CHECK20-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP95]], align 4
+// CHECK20-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 8
 // CHECK20-NEXT:    store i8* null, i8** [[TMP96]], align 4
-// CHECK20-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 8
-// CHECK20-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to %struct.TT**
-// CHECK20-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP98]], align 4
-// CHECK20-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 8
-// CHECK20-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to %struct.TT**
-// CHECK20-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP100]], align 4
-// CHECK20-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK20-NEXT:    store i64 12, i64* [[TMP101]], align 4
-// CHECK20-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 8
-// CHECK20-NEXT:    store i8* null, i8** [[TMP102]], align 4
-// CHECK20-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP106:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144.region_id, i32 9, i8** [[TMP103]], i8** [[TMP104]], i64* [[TMP105]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK20-NEXT:    [[TMP107:%.*]] = icmp ne i32 [[TMP106]], 0
-// CHECK20-NEXT:    br i1 [[TMP107]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK20-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP100:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144.region_id, i32 9, i8** [[TMP97]], i8** [[TMP98]], i64* [[TMP99]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK20-NEXT:    [[TMP101:%.*]] = icmp ne i32 [[TMP100]], 0
+// CHECK20-NEXT:    br i1 [[TMP101]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK20:       omp_offload.failed16:
 // CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144(i32 [[TMP42]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
@@ -9283,10 +9145,10 @@ int bar(int n){
 // CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144(i32 [[TMP42]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK20-NEXT:    br label [[OMP_IF_END19]]
 // CHECK20:       omp_if.end19:
-// CHECK20-NEXT:    [[TMP108:%.*]] = load i32, i32* [[A]], align 4
-// CHECK20-NEXT:    [[TMP109:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP109]])
-// CHECK20-NEXT:    ret i32 [[TMP108]]
+// CHECK20-NEXT:    [[TMP102:%.*]] = load i32, i32* [[A]], align 4
+// CHECK20-NEXT:    [[TMP103:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP103]])
+// CHECK20-NEXT:    ret i32 [[TMP102]]
 //
 //
 // CHECK20-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100
@@ -9639,14 +9501,14 @@ int bar(int n){
 // CHECK20-NEXT:    [[TMP7:%.*]] = mul nuw i32 2, [[TMP1]]
 // CHECK20-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 2
 // CHECK20-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-// CHECK20-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK20-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
-// CHECK20-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK20-NEXT:    store double* [[A]], double** [[TMP13]], align 4
-// CHECK20-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 8, i64* [[TMP14]], align 4
+// CHECK20-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.11 to i8*), i32 40, i1 false)
+// CHECK20-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK20-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 4
+// CHECK20-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK20-NEXT:    store double* [[A]], double** [[TMP14]], align 4
 // CHECK20-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK20-NEXT:    store i8* null, i8** [[TMP15]], align 4
 // CHECK20-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -9655,46 +9517,40 @@ int bar(int n){
 // CHECK20-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK20-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32*
 // CHECK20-NEXT:    store i32 [[TMP5]], i32* [[TMP19]], align 4
-// CHECK20-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP20]], align 4
-// CHECK20-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP21]], align 4
-// CHECK20-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
-// CHECK20-NEXT:    store i32 2, i32* [[TMP23]], align 4
-// CHECK20-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
-// CHECK20-NEXT:    store i32 2, i32* [[TMP25]], align 4
-// CHECK20-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 4, i64* [[TMP26]], align 4
-// CHECK20-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP27]], align 4
-// CHECK20-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP20]], align 4
+// CHECK20-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
+// CHECK20-NEXT:    store i32 2, i32* [[TMP22]], align 4
+// CHECK20-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
+// CHECK20-NEXT:    store i32 2, i32* [[TMP24]], align 4
+// CHECK20-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP25]], align 4
+// CHECK20-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP27]], align 4
+// CHECK20-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK20-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
 // CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP29]], align 4
-// CHECK20-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP31]], align 4
-// CHECK20-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK20-NEXT:    store i64 4, i64* [[TMP32]], align 4
-// CHECK20-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK20-NEXT:    store i8* null, i8** [[TMP33]], align 4
-// CHECK20-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK20-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK20-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 4
-// CHECK20-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK20-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK20-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 4
-// CHECK20-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK20-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 4
-// CHECK20-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK20-NEXT:    store i8* null, i8** [[TMP39]], align 4
-// CHECK20-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK20-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK20-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK20-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK20-NEXT:    store i8* null, i8** [[TMP30]], align 4
+// CHECK20-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK20-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK20-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 4
+// CHECK20-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK20-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK20-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 4
+// CHECK20-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK20-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 4
+// CHECK20-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK20-NEXT:    store i8* null, i8** [[TMP36]], align 4
+// CHECK20-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK20-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK20-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK20:       omp_offload.failed:
 // CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -9704,15 +9560,15 @@ int bar(int n){
 // CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK20-NEXT:    br label [[OMP_IF_END]]
 // CHECK20:       omp_if.end:
-// CHECK20-NEXT:    [[TMP45:%.*]] = mul nsw i32 1, [[TMP1]]
-// CHECK20-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP45]]
+// CHECK20-NEXT:    [[TMP42:%.*]] = mul nsw i32 1, [[TMP1]]
+// CHECK20-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP42]]
 // CHECK20-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
-// CHECK20-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK20-NEXT:    [[CONV:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK20-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK20-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP47]]
-// CHECK20-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK20-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK20-NEXT:    [[CONV:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK20-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK20-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP44]]
+// CHECK20-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK20-NEXT:    ret i32 [[ADD3]]
 //
 //
@@ -9783,7 +9639,7 @@ int bar(int n){
 // CHECK20-NEXT:    store i8* null, i8** [[TMP26]], align 4
 // CHECK20-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l198.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK20-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l198.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK20-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK20-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK20:       omp_offload.failed:
@@ -9851,7 +9707,7 @@ int bar(int n){
 // CHECK20-NEXT:    store i8* null, i8** [[TMP19]], align 4
 // CHECK20-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l181.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.15, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK20-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l181.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK20-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK20-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK20:       omp_offload.failed:
@@ -9888,11 +9744,11 @@ int bar(int n){
 // CHECK20-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
 // CHECK20-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -9958,11 +9814,11 @@ int bar(int n){
 // CHECK20-NEXT:    [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
 // CHECK20-NEXT:    store i8 [[TMP5]], i8* [[CONV3]], align 1
 // CHECK20-NEXT:    [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -10020,11 +9876,11 @@ int bar(int n){
 // CHECK20-NEXT:    [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
 // CHECK20-NEXT:    store i16 [[TMP3]], i16* [[CONV1]], align 2
 // CHECK20-NEXT:    [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -10055,7 +9911,7 @@ int bar(int n){
 //
 //
 // CHECK20-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK20-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK20-SAME: () #[[ATTR8:[0-9]+]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK20-NEXT:    ret void

diff  --git a/clang/test/OpenMP/target_parallel_for_codegen.cpp b/clang/test/OpenMP/target_parallel_for_codegen.cpp
index da93feb753e40..0bb1911227bb3 100644
--- a/clang/test/OpenMP/target_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_codegen.cpp
@@ -520,14 +520,14 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP86:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK1-NEXT:    [[TMP87:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK1-NEXT:    [[TMP88:%.*]] = mul nuw i64 [[TMP87]], 8
-// CHECK1-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP82]], i64* [[TMP90]], align 8
-// CHECK1-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP82]], i64* [[TMP92]], align 8
-// CHECK1-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    store i64 4, i64* [[TMP93]], align 8
+// CHECK1-NEXT:    [[TMP89:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP89]], i8* align 8 bitcast ([10 x i64]* @.offload_sizes.8 to i8*), i64 80, i1 false)
+// CHECK1-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP82]], i64* [[TMP91]], align 8
+// CHECK1-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP93:%.*]] = bitcast i8** [[TMP92]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP82]], i64* [[TMP93]], align 8
 // CHECK1-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0
 // CHECK1-NEXT:    store i8* null, i8** [[TMP94]], align 8
 // CHECK1-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 1
@@ -536,96 +536,82 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to [10 x float]**
 // CHECK1-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP98]], align 8
-// CHECK1-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK1-NEXT:    store i64 40, i64* [[TMP99]], align 8
-// CHECK1-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
-// CHECK1-NEXT:    store i8* null, i8** [[TMP100]], align 8
-// CHECK1-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
-// CHECK1-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP102]], align 8
-// CHECK1-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
-// CHECK1-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP104]], align 8
-// CHECK1-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK1-NEXT:    store i64 8, i64* [[TMP105]], align 8
-// CHECK1-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
-// CHECK1-NEXT:    store i8* null, i8** [[TMP106]], align 8
-// CHECK1-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
+// CHECK1-NEXT:    store i8* null, i8** [[TMP99]], align 8
+// CHECK1-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP101]], align 8
+// CHECK1-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP103:%.*]] = bitcast i8** [[TMP102]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP103]], align 8
+// CHECK1-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
+// CHECK1-NEXT:    store i8* null, i8** [[TMP104]], align 8
+// CHECK1-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to float**
+// CHECK1-NEXT:    store float* [[VLA]], float** [[TMP106]], align 8
+// CHECK1-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
 // CHECK1-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to float**
 // CHECK1-NEXT:    store float* [[VLA]], float** [[TMP108]], align 8
-// CHECK1-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
-// CHECK1-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to float**
-// CHECK1-NEXT:    store float* [[VLA]], float** [[TMP110]], align 8
-// CHECK1-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK1-NEXT:    store i64 [[TMP86]], i64* [[TMP111]], align 8
-// CHECK1-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 3
-// CHECK1-NEXT:    store i8* null, i8** [[TMP112]], align 8
-// CHECK1-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK1-NEXT:    store i64 [[TMP86]], i64* [[TMP109]], align 8
+// CHECK1-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 3
+// CHECK1-NEXT:    store i8* null, i8** [[TMP110]], align 8
+// CHECK1-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to [5 x [10 x double]]**
+// CHECK1-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP112]], align 8
+// CHECK1-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
 // CHECK1-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to [5 x [10 x double]]**
 // CHECK1-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP114]], align 8
-// CHECK1-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
-// CHECK1-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to [5 x [10 x double]]**
-// CHECK1-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP116]], align 8
-// CHECK1-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK1-NEXT:    store i64 400, i64* [[TMP117]], align 8
-// CHECK1-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 4
-// CHECK1-NEXT:    store i8* null, i8** [[TMP118]], align 8
-// CHECK1-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 5
-// CHECK1-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i64*
-// CHECK1-NEXT:    store i64 5, i64* [[TMP120]], align 8
-// CHECK1-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 4
+// CHECK1-NEXT:    store i8* null, i8** [[TMP115]], align 8
+// CHECK1-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i64*
+// CHECK1-NEXT:    store i64 5, i64* [[TMP117]], align 8
+// CHECK1-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to i64*
+// CHECK1-NEXT:    store i64 5, i64* [[TMP119]], align 8
+// CHECK1-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 5
+// CHECK1-NEXT:    store i8* null, i8** [[TMP120]], align 8
+// CHECK1-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 6
 // CHECK1-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to i64*
-// CHECK1-NEXT:    store i64 5, i64* [[TMP122]], align 8
-// CHECK1-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK1-NEXT:    store i64 8, i64* [[TMP123]], align 8
-// CHECK1-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 5
-// CHECK1-NEXT:    store i8* null, i8** [[TMP124]], align 8
-// CHECK1-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 6
-// CHECK1-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP126]], align 8
-// CHECK1-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 6
-// CHECK1-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP128]], align 8
-// CHECK1-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK1-NEXT:    store i64 8, i64* [[TMP129]], align 8
-// CHECK1-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 6
-// CHECK1-NEXT:    store i8* null, i8** [[TMP130]], align 8
-// CHECK1-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 7
-// CHECK1-NEXT:    [[TMP132:%.*]] = bitcast i8** [[TMP131]] to double**
-// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP132]], align 8
-// CHECK1-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 7
-// CHECK1-NEXT:    [[TMP134:%.*]] = bitcast i8** [[TMP133]] to double**
-// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP134]], align 8
-// CHECK1-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK1-NEXT:    store i64 [[TMP88]], i64* [[TMP135]], align 8
-// CHECK1-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 7
+// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP122]], align 8
+// CHECK1-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 6
+// CHECK1-NEXT:    [[TMP124:%.*]] = bitcast i8** [[TMP123]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP124]], align 8
+// CHECK1-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 6
+// CHECK1-NEXT:    store i8* null, i8** [[TMP125]], align 8
+// CHECK1-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 7
+// CHECK1-NEXT:    [[TMP127:%.*]] = bitcast i8** [[TMP126]] to double**
+// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP127]], align 8
+// CHECK1-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 7
+// CHECK1-NEXT:    [[TMP129:%.*]] = bitcast i8** [[TMP128]] to double**
+// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP129]], align 8
+// CHECK1-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK1-NEXT:    store i64 [[TMP88]], i64* [[TMP130]], align 8
+// CHECK1-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 7
+// CHECK1-NEXT:    store i8* null, i8** [[TMP131]], align 8
+// CHECK1-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 8
+// CHECK1-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to %struct.TT**
+// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP133]], align 8
+// CHECK1-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 8
+// CHECK1-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to %struct.TT**
+// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP135]], align 8
+// CHECK1-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 8
 // CHECK1-NEXT:    store i8* null, i8** [[TMP136]], align 8
-// CHECK1-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 8
-// CHECK1-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to %struct.TT**
-// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP138]], align 8
-// CHECK1-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 8
-// CHECK1-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to %struct.TT**
-// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP140]], align 8
-// CHECK1-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK1-NEXT:    store i64 16, i64* [[TMP141]], align 8
-// CHECK1-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 8
-// CHECK1-NEXT:    store i8* null, i8** [[TMP142]], align 8
-// CHECK1-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 9
-// CHECK1-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP84]], i64* [[TMP144]], align 8
-// CHECK1-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 9
-// CHECK1-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP84]], i64* [[TMP146]], align 8
-// CHECK1-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK1-NEXT:    store i64 4, i64* [[TMP147]], align 8
-// CHECK1-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 9
-// CHECK1-NEXT:    store i8* null, i8** [[TMP148]], align 8
-// CHECK1-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP152:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170.region_id, i32 10, i8** [[TMP149]], i8** [[TMP150]], i64* [[TMP151]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK1-NEXT:    [[TMP153:%.*]] = icmp ne i32 [[TMP152]], 0
-// CHECK1-NEXT:    br i1 [[TMP153]], label [[OMP_OFFLOAD_FAILED23:%.*]], label [[OMP_OFFLOAD_CONT24:%.*]]
+// CHECK1-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 9
+// CHECK1-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP84]], i64* [[TMP138]], align 8
+// CHECK1-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 9
+// CHECK1-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP84]], i64* [[TMP140]], align 8
+// CHECK1-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 9
+// CHECK1-NEXT:    store i8* null, i8** [[TMP141]], align 8
+// CHECK1-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP145:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170.region_id, i32 10, i8** [[TMP142]], i8** [[TMP143]], i64* [[TMP144]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK1-NEXT:    [[TMP146:%.*]] = icmp ne i32 [[TMP145]], 0
+// CHECK1-NEXT:    br i1 [[TMP146]], label [[OMP_OFFLOAD_FAILED23:%.*]], label [[OMP_OFFLOAD_CONT24:%.*]]
 // CHECK1:       omp_offload.failed23:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170(i64 [[TMP82]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]], i64 [[TMP84]]) #[[ATTR4]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT24]]
@@ -635,10 +621,10 @@ int bar(int n){
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170(i64 [[TMP82]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]], i64 [[TMP84]]) #[[ATTR4]]
 // CHECK1-NEXT:    br label [[OMP_IF_END26]]
 // CHECK1:       omp_if.end26:
-// CHECK1-NEXT:    [[TMP154:%.*]] = load i32, i32* [[A]], align 4
-// CHECK1-NEXT:    [[TMP155:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP155]])
-// CHECK1-NEXT:    ret i32 [[TMP154]]
+// CHECK1-NEXT:    [[TMP147:%.*]] = load i32, i32* [[A]], align 4
+// CHECK1-NEXT:    [[TMP148:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP148]])
+// CHECK1-NEXT:    ret i32 [[TMP147]]
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103
@@ -1436,14 +1422,14 @@ int bar(int n){
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP8:%.*]] = mul nuw i64 2, [[TMP2]]
 // CHECK1-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2
-// CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK1-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
-// CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK1-NEXT:    store double* [[A]], double** [[TMP13]], align 8
-// CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    store i64 8, i64* [[TMP14]], align 8
+// CHECK1-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.11 to i8*), i64 40, i1 false)
+// CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK1-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 8
+// CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK1-NEXT:    store double* [[A]], double** [[TMP14]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK1-NEXT:    store i8* null, i8** [[TMP15]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1452,46 +1438,40 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i64*
 // CHECK1-NEXT:    store i64 [[TMP6]], i64* [[TMP19]], align 8
-// CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK1-NEXT:    store i64 4, i64* [[TMP20]], align 8
-// CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK1-NEXT:    store i8* null, i8** [[TMP21]], align 8
-// CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK1-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
-// CHECK1-NEXT:    store i64 2, i64* [[TMP23]], align 8
-// CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK1-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK1-NEXT:    store i64 2, i64* [[TMP25]], align 8
-// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK1-NEXT:    store i64 8, i64* [[TMP26]], align 8
-// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK1-NEXT:    store i8* null, i8** [[TMP27]], align 8
-// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK1-NEXT:    store i8* null, i8** [[TMP20]], align 8
+// CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
+// CHECK1-NEXT:    store i64 2, i64* [[TMP22]], align 8
+// CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK1-NEXT:    store i64 2, i64* [[TMP24]], align 8
+// CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK1-NEXT:    store i8* null, i8** [[TMP25]], align 8
+// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP27]], align 8
+// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK1-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
 // CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP29]], align 8
-// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK1-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP31]], align 8
-// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK1-NEXT:    store i64 8, i64* [[TMP32]], align 8
-// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK1-NEXT:    store i8* null, i8** [[TMP33]], align 8
-// CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK1-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 8
-// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK1-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 8
-// CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK1-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 8
-// CHECK1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK1-NEXT:    store i8* null, i8** [[TMP39]], align 8
-// CHECK1-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK1-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK1-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK1-NEXT:    store i8* null, i8** [[TMP30]], align 8
+// CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 8
+// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 8
+// CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK1-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 8
+// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK1-NEXT:    store i8* null, i8** [[TMP36]], align 8
+// CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK1-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK1-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -1501,15 +1481,15 @@ int bar(int n){
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK1-NEXT:    br label [[OMP_IF_END]]
 // CHECK1:       omp_if.end:
-// CHECK1-NEXT:    [[TMP45:%.*]] = mul nsw i64 1, [[TMP2]]
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP45]]
+// CHECK1-NEXT:    [[TMP42:%.*]] = mul nsw i64 1, [[TMP2]]
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP42]]
 // CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
-// CHECK1-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK1-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK1-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP47]]
-// CHECK1-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK1-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK1-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK1-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP44]]
+// CHECK1-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK1-NEXT:    ret i32 [[ADD4]]
 //
 //
@@ -1581,7 +1561,7 @@ int bar(int n){
 // CHECK1-NEXT:    store i8* null, i8** [[TMP26]], align 8
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l224.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK1-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l224.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK1-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK1-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
@@ -1650,7 +1630,7 @@ int bar(int n){
 // CHECK1-NEXT:    store i8* null, i8** [[TMP19]], align 8
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l207.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.15, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK1-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l207.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK1-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK1-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
@@ -1689,11 +1669,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32*
 // CHECK1-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK1-NEXT:    [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8
-// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1812,11 +1792,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
 // CHECK1-NEXT:    store i8 [[TMP5]], i8* [[CONV5]], align 1
 // CHECK1-NEXT:    [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
-// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1862,11 +1842,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
 // CHECK1-NEXT:    store i16 [[TMP3]], i16* [[CONV3]], align 2
 // CHECK1-NEXT:    [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8
-// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2158,14 +2138,14 @@ int bar(int n){
 // CHECK2-NEXT:    [[TMP86:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK2-NEXT:    [[TMP87:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK2-NEXT:    [[TMP88:%.*]] = mul nuw i64 [[TMP87]], 8
-// CHECK2-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP82]], i64* [[TMP90]], align 8
-// CHECK2-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP82]], i64* [[TMP92]], align 8
-// CHECK2-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK2-NEXT:    store i64 4, i64* [[TMP93]], align 8
+// CHECK2-NEXT:    [[TMP89:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP89]], i8* align 8 bitcast ([10 x i64]* @.offload_sizes.8 to i8*), i64 80, i1 false)
+// CHECK2-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP82]], i64* [[TMP91]], align 8
+// CHECK2-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP93:%.*]] = bitcast i8** [[TMP92]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP82]], i64* [[TMP93]], align 8
 // CHECK2-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0
 // CHECK2-NEXT:    store i8* null, i8** [[TMP94]], align 8
 // CHECK2-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 1
@@ -2174,96 +2154,82 @@ int bar(int n){
 // CHECK2-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 1
 // CHECK2-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to [10 x float]**
 // CHECK2-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP98]], align 8
-// CHECK2-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK2-NEXT:    store i64 40, i64* [[TMP99]], align 8
-// CHECK2-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
-// CHECK2-NEXT:    store i8* null, i8** [[TMP100]], align 8
-// CHECK2-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
-// CHECK2-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP102]], align 8
-// CHECK2-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
-// CHECK2-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP104]], align 8
-// CHECK2-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK2-NEXT:    store i64 8, i64* [[TMP105]], align 8
-// CHECK2-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
-// CHECK2-NEXT:    store i8* null, i8** [[TMP106]], align 8
-// CHECK2-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
+// CHECK2-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
+// CHECK2-NEXT:    store i8* null, i8** [[TMP99]], align 8
+// CHECK2-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP101]], align 8
+// CHECK2-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP103:%.*]] = bitcast i8** [[TMP102]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP103]], align 8
+// CHECK2-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
+// CHECK2-NEXT:    store i8* null, i8** [[TMP104]], align 8
+// CHECK2-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
+// CHECK2-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to float**
+// CHECK2-NEXT:    store float* [[VLA]], float** [[TMP106]], align 8
+// CHECK2-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
 // CHECK2-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to float**
 // CHECK2-NEXT:    store float* [[VLA]], float** [[TMP108]], align 8
-// CHECK2-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
-// CHECK2-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to float**
-// CHECK2-NEXT:    store float* [[VLA]], float** [[TMP110]], align 8
-// CHECK2-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK2-NEXT:    store i64 [[TMP86]], i64* [[TMP111]], align 8
-// CHECK2-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 3
-// CHECK2-NEXT:    store i8* null, i8** [[TMP112]], align 8
-// CHECK2-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
+// CHECK2-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK2-NEXT:    store i64 [[TMP86]], i64* [[TMP109]], align 8
+// CHECK2-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 3
+// CHECK2-NEXT:    store i8* null, i8** [[TMP110]], align 8
+// CHECK2-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
+// CHECK2-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to [5 x [10 x double]]**
+// CHECK2-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP112]], align 8
+// CHECK2-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
 // CHECK2-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to [5 x [10 x double]]**
 // CHECK2-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP114]], align 8
-// CHECK2-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
-// CHECK2-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to [5 x [10 x double]]**
-// CHECK2-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP116]], align 8
-// CHECK2-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK2-NEXT:    store i64 400, i64* [[TMP117]], align 8
-// CHECK2-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 4
-// CHECK2-NEXT:    store i8* null, i8** [[TMP118]], align 8
-// CHECK2-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 5
-// CHECK2-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i64*
-// CHECK2-NEXT:    store i64 5, i64* [[TMP120]], align 8
-// CHECK2-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 5
+// CHECK2-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 4
+// CHECK2-NEXT:    store i8* null, i8** [[TMP115]], align 8
+// CHECK2-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 5
+// CHECK2-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i64*
+// CHECK2-NEXT:    store i64 5, i64* [[TMP117]], align 8
+// CHECK2-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 5
+// CHECK2-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to i64*
+// CHECK2-NEXT:    store i64 5, i64* [[TMP119]], align 8
+// CHECK2-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 5
+// CHECK2-NEXT:    store i8* null, i8** [[TMP120]], align 8
+// CHECK2-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 6
 // CHECK2-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to i64*
-// CHECK2-NEXT:    store i64 5, i64* [[TMP122]], align 8
-// CHECK2-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK2-NEXT:    store i64 8, i64* [[TMP123]], align 8
-// CHECK2-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 5
-// CHECK2-NEXT:    store i8* null, i8** [[TMP124]], align 8
-// CHECK2-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 6
-// CHECK2-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP5]], i64* [[TMP126]], align 8
-// CHECK2-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 6
-// CHECK2-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP5]], i64* [[TMP128]], align 8
-// CHECK2-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK2-NEXT:    store i64 8, i64* [[TMP129]], align 8
-// CHECK2-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 6
-// CHECK2-NEXT:    store i8* null, i8** [[TMP130]], align 8
-// CHECK2-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 7
-// CHECK2-NEXT:    [[TMP132:%.*]] = bitcast i8** [[TMP131]] to double**
-// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP132]], align 8
-// CHECK2-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 7
-// CHECK2-NEXT:    [[TMP134:%.*]] = bitcast i8** [[TMP133]] to double**
-// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP134]], align 8
-// CHECK2-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK2-NEXT:    store i64 [[TMP88]], i64* [[TMP135]], align 8
-// CHECK2-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 7
+// CHECK2-NEXT:    store i64 [[TMP5]], i64* [[TMP122]], align 8
+// CHECK2-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 6
+// CHECK2-NEXT:    [[TMP124:%.*]] = bitcast i8** [[TMP123]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP5]], i64* [[TMP124]], align 8
+// CHECK2-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 6
+// CHECK2-NEXT:    store i8* null, i8** [[TMP125]], align 8
+// CHECK2-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 7
+// CHECK2-NEXT:    [[TMP127:%.*]] = bitcast i8** [[TMP126]] to double**
+// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP127]], align 8
+// CHECK2-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 7
+// CHECK2-NEXT:    [[TMP129:%.*]] = bitcast i8** [[TMP128]] to double**
+// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP129]], align 8
+// CHECK2-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK2-NEXT:    store i64 [[TMP88]], i64* [[TMP130]], align 8
+// CHECK2-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 7
+// CHECK2-NEXT:    store i8* null, i8** [[TMP131]], align 8
+// CHECK2-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 8
+// CHECK2-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to %struct.TT**
+// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP133]], align 8
+// CHECK2-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 8
+// CHECK2-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to %struct.TT**
+// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP135]], align 8
+// CHECK2-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 8
 // CHECK2-NEXT:    store i8* null, i8** [[TMP136]], align 8
-// CHECK2-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 8
-// CHECK2-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to %struct.TT**
-// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP138]], align 8
-// CHECK2-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 8
-// CHECK2-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to %struct.TT**
-// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP140]], align 8
-// CHECK2-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK2-NEXT:    store i64 16, i64* [[TMP141]], align 8
-// CHECK2-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 8
-// CHECK2-NEXT:    store i8* null, i8** [[TMP142]], align 8
-// CHECK2-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 9
-// CHECK2-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP84]], i64* [[TMP144]], align 8
-// CHECK2-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 9
-// CHECK2-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP84]], i64* [[TMP146]], align 8
-// CHECK2-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK2-NEXT:    store i64 4, i64* [[TMP147]], align 8
-// CHECK2-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 9
-// CHECK2-NEXT:    store i8* null, i8** [[TMP148]], align 8
-// CHECK2-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP152:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170.region_id, i32 10, i8** [[TMP149]], i8** [[TMP150]], i64* [[TMP151]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK2-NEXT:    [[TMP153:%.*]] = icmp ne i32 [[TMP152]], 0
-// CHECK2-NEXT:    br i1 [[TMP153]], label [[OMP_OFFLOAD_FAILED23:%.*]], label [[OMP_OFFLOAD_CONT24:%.*]]
+// CHECK2-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 9
+// CHECK2-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP84]], i64* [[TMP138]], align 8
+// CHECK2-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 9
+// CHECK2-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP84]], i64* [[TMP140]], align 8
+// CHECK2-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 9
+// CHECK2-NEXT:    store i8* null, i8** [[TMP141]], align 8
+// CHECK2-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP145:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170.region_id, i32 10, i8** [[TMP142]], i8** [[TMP143]], i64* [[TMP144]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK2-NEXT:    [[TMP146:%.*]] = icmp ne i32 [[TMP145]], 0
+// CHECK2-NEXT:    br i1 [[TMP146]], label [[OMP_OFFLOAD_FAILED23:%.*]], label [[OMP_OFFLOAD_CONT24:%.*]]
 // CHECK2:       omp_offload.failed23:
 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170(i64 [[TMP82]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]], i64 [[TMP84]]) #[[ATTR4]]
 // CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT24]]
@@ -2273,10 +2239,10 @@ int bar(int n){
 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170(i64 [[TMP82]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]], i64 [[TMP84]]) #[[ATTR4]]
 // CHECK2-NEXT:    br label [[OMP_IF_END26]]
 // CHECK2:       omp_if.end26:
-// CHECK2-NEXT:    [[TMP154:%.*]] = load i32, i32* [[A]], align 4
-// CHECK2-NEXT:    [[TMP155:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP155]])
-// CHECK2-NEXT:    ret i32 [[TMP154]]
+// CHECK2-NEXT:    [[TMP147:%.*]] = load i32, i32* [[A]], align 4
+// CHECK2-NEXT:    [[TMP148:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP148]])
+// CHECK2-NEXT:    ret i32 [[TMP147]]
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103
@@ -3074,14 +3040,14 @@ int bar(int n){
 // CHECK2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP8:%.*]] = mul nuw i64 2, [[TMP2]]
 // CHECK2-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2
-// CHECK2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK2-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
-// CHECK2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK2-NEXT:    store double* [[A]], double** [[TMP13]], align 8
-// CHECK2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK2-NEXT:    store i64 8, i64* [[TMP14]], align 8
+// CHECK2-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.11 to i8*), i64 40, i1 false)
+// CHECK2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK2-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 8
+// CHECK2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK2-NEXT:    store double* [[A]], double** [[TMP14]], align 8
 // CHECK2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK2-NEXT:    store i8* null, i8** [[TMP15]], align 8
 // CHECK2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -3090,46 +3056,40 @@ int bar(int n){
 // CHECK2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK2-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i64*
 // CHECK2-NEXT:    store i64 [[TMP6]], i64* [[TMP19]], align 8
-// CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK2-NEXT:    store i64 4, i64* [[TMP20]], align 8
-// CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK2-NEXT:    store i8* null, i8** [[TMP21]], align 8
-// CHECK2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK2-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
-// CHECK2-NEXT:    store i64 2, i64* [[TMP23]], align 8
-// CHECK2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK2-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK2-NEXT:    store i64 2, i64* [[TMP25]], align 8
-// CHECK2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK2-NEXT:    store i64 8, i64* [[TMP26]], align 8
-// CHECK2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK2-NEXT:    store i8* null, i8** [[TMP27]], align 8
-// CHECK2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK2-NEXT:    store i8* null, i8** [[TMP20]], align 8
+// CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
+// CHECK2-NEXT:    store i64 2, i64* [[TMP22]], align 8
+// CHECK2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK2-NEXT:    store i64 2, i64* [[TMP24]], align 8
+// CHECK2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK2-NEXT:    store i8* null, i8** [[TMP25]], align 8
+// CHECK2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK2-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP27]], align 8
+// CHECK2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK2-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
 // CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP29]], align 8
-// CHECK2-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK2-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP31]], align 8
-// CHECK2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK2-NEXT:    store i64 8, i64* [[TMP32]], align 8
-// CHECK2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK2-NEXT:    store i8* null, i8** [[TMP33]], align 8
-// CHECK2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK2-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 8
-// CHECK2-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK2-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 8
-// CHECK2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK2-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 8
-// CHECK2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK2-NEXT:    store i8* null, i8** [[TMP39]], align 8
-// CHECK2-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK2-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK2-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK2-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK2-NEXT:    store i8* null, i8** [[TMP30]], align 8
+// CHECK2-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK2-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 8
+// CHECK2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK2-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 8
+// CHECK2-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK2-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 8
+// CHECK2-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK2-NEXT:    store i8* null, i8** [[TMP36]], align 8
+// CHECK2-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK2-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK2-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK2:       omp_offload.failed:
 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -3139,15 +3099,15 @@ int bar(int n){
 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK2-NEXT:    br label [[OMP_IF_END]]
 // CHECK2:       omp_if.end:
-// CHECK2-NEXT:    [[TMP45:%.*]] = mul nsw i64 1, [[TMP2]]
-// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP45]]
+// CHECK2-NEXT:    [[TMP42:%.*]] = mul nsw i64 1, [[TMP2]]
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP42]]
 // CHECK2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
-// CHECK2-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK2-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK2-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP47]]
-// CHECK2-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK2-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK2-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK2-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP44]]
+// CHECK2-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK2-NEXT:    ret i32 [[ADD4]]
 //
 //
@@ -3219,7 +3179,7 @@ int bar(int n){
 // CHECK2-NEXT:    store i8* null, i8** [[TMP26]], align 8
 // CHECK2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l224.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK2-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l224.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK2-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK2-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK2:       omp_offload.failed:
@@ -3288,7 +3248,7 @@ int bar(int n){
 // CHECK2-NEXT:    store i8* null, i8** [[TMP19]], align 8
 // CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l207.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.15, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK2-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l207.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK2-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK2-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK2:       omp_offload.failed:
@@ -3327,11 +3287,11 @@ int bar(int n){
 // CHECK2-NEXT:    [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32*
 // CHECK2-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK2-NEXT:    [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3450,11 +3410,11 @@ int bar(int n){
 // CHECK2-NEXT:    [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
 // CHECK2-NEXT:    store i8 [[TMP5]], i8* [[CONV5]], align 1
 // CHECK2-NEXT:    [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3500,11 +3460,11 @@ int bar(int n){
 // CHECK2-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
 // CHECK2-NEXT:    store i16 [[TMP3]], i16* [[CONV3]], align 2
 // CHECK2-NEXT:    [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3786,14 +3746,14 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP84:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK3-NEXT:    [[TMP85:%.*]] = mul nuw i32 [[TMP84]], 8
 // CHECK3-NEXT:    [[TMP86:%.*]] = sext i32 [[TMP85]] to i64
-// CHECK3-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP78]], i32* [[TMP88]], align 4
-// CHECK3-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP78]], i32* [[TMP90]], align 4
-// CHECK3-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    store i64 4, i64* [[TMP91]], align 4
+// CHECK3-NEXT:    [[TMP87:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP87]], i8* align 4 bitcast ([10 x i64]* @.offload_sizes.8 to i8*), i32 80, i1 false)
+// CHECK3-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP78]], i32* [[TMP89]], align 4
+// CHECK3-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP78]], i32* [[TMP91]], align 4
 // CHECK3-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 0
 // CHECK3-NEXT:    store i8* null, i8** [[TMP92]], align 4
 // CHECK3-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 1
@@ -3802,96 +3762,82 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to [10 x float]**
 // CHECK3-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP96]], align 4
-// CHECK3-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK3-NEXT:    store i64 40, i64* [[TMP97]], align 4
-// CHECK3-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 1
-// CHECK3-NEXT:    store i8* null, i8** [[TMP98]], align 4
-// CHECK3-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 2
-// CHECK3-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP100]], align 4
-// CHECK3-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 2
-// CHECK3-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP102]], align 4
-// CHECK3-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK3-NEXT:    store i64 4, i64* [[TMP103]], align 4
-// CHECK3-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 2
-// CHECK3-NEXT:    store i8* null, i8** [[TMP104]], align 4
-// CHECK3-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 1
+// CHECK3-NEXT:    store i8* null, i8** [[TMP97]], align 4
+// CHECK3-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP99]], align 4
+// CHECK3-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP101]], align 4
+// CHECK3-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 2
+// CHECK3-NEXT:    store i8* null, i8** [[TMP102]], align 4
+// CHECK3-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to float**
+// CHECK3-NEXT:    store float* [[VLA]], float** [[TMP104]], align 4
+// CHECK3-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 3
 // CHECK3-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to float**
 // CHECK3-NEXT:    store float* [[VLA]], float** [[TMP106]], align 4
-// CHECK3-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 3
-// CHECK3-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to float**
-// CHECK3-NEXT:    store float* [[VLA]], float** [[TMP108]], align 4
-// CHECK3-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK3-NEXT:    store i64 [[TMP83]], i64* [[TMP109]], align 4
-// CHECK3-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 3
-// CHECK3-NEXT:    store i8* null, i8** [[TMP110]], align 4
-// CHECK3-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK3-NEXT:    store i64 [[TMP83]], i64* [[TMP107]], align 4
+// CHECK3-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 3
+// CHECK3-NEXT:    store i8* null, i8** [[TMP108]], align 4
+// CHECK3-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to [5 x [10 x double]]**
+// CHECK3-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP110]], align 4
+// CHECK3-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 4
 // CHECK3-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to [5 x [10 x double]]**
 // CHECK3-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP112]], align 4
-// CHECK3-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 4
-// CHECK3-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to [5 x [10 x double]]**
-// CHECK3-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP114]], align 4
-// CHECK3-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK3-NEXT:    store i64 400, i64* [[TMP115]], align 4
-// CHECK3-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 4
-// CHECK3-NEXT:    store i8* null, i8** [[TMP116]], align 4
-// CHECK3-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 5
-// CHECK3-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to i32*
-// CHECK3-NEXT:    store i32 5, i32* [[TMP118]], align 4
-// CHECK3-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 5
+// CHECK3-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 4
+// CHECK3-NEXT:    store i8* null, i8** [[TMP113]], align 4
+// CHECK3-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 5
+// CHECK3-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32*
+// CHECK3-NEXT:    store i32 5, i32* [[TMP115]], align 4
+// CHECK3-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 5
+// CHECK3-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32*
+// CHECK3-NEXT:    store i32 5, i32* [[TMP117]], align 4
+// CHECK3-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 5
+// CHECK3-NEXT:    store i8* null, i8** [[TMP118]], align 4
+// CHECK3-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 6
 // CHECK3-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i32*
-// CHECK3-NEXT:    store i32 5, i32* [[TMP120]], align 4
-// CHECK3-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK3-NEXT:    store i64 4, i64* [[TMP121]], align 4
-// CHECK3-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 5
-// CHECK3-NEXT:    store i8* null, i8** [[TMP122]], align 4
-// CHECK3-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 6
-// CHECK3-NEXT:    [[TMP124:%.*]] = bitcast i8** [[TMP123]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP124]], align 4
-// CHECK3-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 6
-// CHECK3-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP126]], align 4
-// CHECK3-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK3-NEXT:    store i64 4, i64* [[TMP127]], align 4
-// CHECK3-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 6
-// CHECK3-NEXT:    store i8* null, i8** [[TMP128]], align 4
-// CHECK3-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 7
-// CHECK3-NEXT:    [[TMP130:%.*]] = bitcast i8** [[TMP129]] to double**
-// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP130]], align 4
-// CHECK3-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 7
-// CHECK3-NEXT:    [[TMP132:%.*]] = bitcast i8** [[TMP131]] to double**
-// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP132]], align 4
-// CHECK3-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK3-NEXT:    store i64 [[TMP86]], i64* [[TMP133]], align 4
-// CHECK3-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 7
+// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP120]], align 4
+// CHECK3-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 6
+// CHECK3-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP122]], align 4
+// CHECK3-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 6
+// CHECK3-NEXT:    store i8* null, i8** [[TMP123]], align 4
+// CHECK3-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 7
+// CHECK3-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to double**
+// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP125]], align 4
+// CHECK3-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 7
+// CHECK3-NEXT:    [[TMP127:%.*]] = bitcast i8** [[TMP126]] to double**
+// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP127]], align 4
+// CHECK3-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK3-NEXT:    store i64 [[TMP86]], i64* [[TMP128]], align 4
+// CHECK3-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 7
+// CHECK3-NEXT:    store i8* null, i8** [[TMP129]], align 4
+// CHECK3-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 8
+// CHECK3-NEXT:    [[TMP131:%.*]] = bitcast i8** [[TMP130]] to %struct.TT**
+// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP131]], align 4
+// CHECK3-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 8
+// CHECK3-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to %struct.TT**
+// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP133]], align 4
+// CHECK3-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 8
 // CHECK3-NEXT:    store i8* null, i8** [[TMP134]], align 4
-// CHECK3-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 8
-// CHECK3-NEXT:    [[TMP136:%.*]] = bitcast i8** [[TMP135]] to %struct.TT**
-// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP136]], align 4
-// CHECK3-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 8
-// CHECK3-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to %struct.TT**
-// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP138]], align 4
-// CHECK3-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK3-NEXT:    store i64 12, i64* [[TMP139]], align 4
-// CHECK3-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 8
-// CHECK3-NEXT:    store i8* null, i8** [[TMP140]], align 4
-// CHECK3-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 9
-// CHECK3-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP80]], i32* [[TMP142]], align 4
-// CHECK3-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 9
-// CHECK3-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP80]], i32* [[TMP144]], align 4
-// CHECK3-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK3-NEXT:    store i64 4, i64* [[TMP145]], align 4
-// CHECK3-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 9
-// CHECK3-NEXT:    store i8* null, i8** [[TMP146]], align 4
-// CHECK3-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP150:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170.region_id, i32 10, i8** [[TMP147]], i8** [[TMP148]], i64* [[TMP149]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK3-NEXT:    [[TMP151:%.*]] = icmp ne i32 [[TMP150]], 0
-// CHECK3-NEXT:    br i1 [[TMP151]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
+// CHECK3-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 9
+// CHECK3-NEXT:    [[TMP136:%.*]] = bitcast i8** [[TMP135]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP80]], i32* [[TMP136]], align 4
+// CHECK3-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 9
+// CHECK3-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP80]], i32* [[TMP138]], align 4
+// CHECK3-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 9
+// CHECK3-NEXT:    store i8* null, i8** [[TMP139]], align 4
+// CHECK3-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP143:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170.region_id, i32 10, i8** [[TMP140]], i8** [[TMP141]], i64* [[TMP142]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK3-NEXT:    [[TMP144:%.*]] = icmp ne i32 [[TMP143]], 0
+// CHECK3-NEXT:    br i1 [[TMP144]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK3:       omp_offload.failed17:
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170(i32 [[TMP78]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]], i32 [[TMP80]]) #[[ATTR4]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT18]]
@@ -3901,10 +3847,10 @@ int bar(int n){
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170(i32 [[TMP78]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]], i32 [[TMP80]]) #[[ATTR4]]
 // CHECK3-NEXT:    br label [[OMP_IF_END20]]
 // CHECK3:       omp_if.end20:
-// CHECK3-NEXT:    [[TMP152:%.*]] = load i32, i32* [[A]], align 4
-// CHECK3-NEXT:    [[TMP153:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP153]])
-// CHECK3-NEXT:    ret i32 [[TMP152]]
+// CHECK3-NEXT:    [[TMP145:%.*]] = load i32, i32* [[A]], align 4
+// CHECK3-NEXT:    [[TMP146:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP146]])
+// CHECK3-NEXT:    ret i32 [[TMP145]]
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103
@@ -4679,14 +4625,14 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP7:%.*]] = mul nuw i32 2, [[TMP1]]
 // CHECK3-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 2
 // CHECK3-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-// CHECK3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK3-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
-// CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK3-NEXT:    store double* [[A]], double** [[TMP13]], align 4
-// CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    store i64 8, i64* [[TMP14]], align 4
+// CHECK3-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.11 to i8*), i32 40, i1 false)
+// CHECK3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK3-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 4
+// CHECK3-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK3-NEXT:    store double* [[A]], double** [[TMP14]], align 4
 // CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK3-NEXT:    store i8* null, i8** [[TMP15]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -4695,46 +4641,40 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32*
 // CHECK3-NEXT:    store i32 [[TMP5]], i32* [[TMP19]], align 4
-// CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK3-NEXT:    store i64 4, i64* [[TMP20]], align 4
-// CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK3-NEXT:    store i8* null, i8** [[TMP21]], align 4
-// CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK3-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
-// CHECK3-NEXT:    store i32 2, i32* [[TMP23]], align 4
-// CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK3-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
-// CHECK3-NEXT:    store i32 2, i32* [[TMP25]], align 4
-// CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK3-NEXT:    store i64 4, i64* [[TMP26]], align 4
-// CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK3-NEXT:    store i8* null, i8** [[TMP27]], align 4
-// CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK3-NEXT:    store i8* null, i8** [[TMP20]], align 4
+// CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
+// CHECK3-NEXT:    store i32 2, i32* [[TMP22]], align 4
+// CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
+// CHECK3-NEXT:    store i32 2, i32* [[TMP24]], align 4
+// CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK3-NEXT:    store i8* null, i8** [[TMP25]], align 4
+// CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP27]], align 4
+// CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK3-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
 // CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP29]], align 4
-// CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK3-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP31]], align 4
-// CHECK3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK3-NEXT:    store i64 4, i64* [[TMP32]], align 4
-// CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK3-NEXT:    store i8* null, i8** [[TMP33]], align 4
-// CHECK3-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK3-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 4
-// CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK3-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 4
-// CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK3-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 4
-// CHECK3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK3-NEXT:    store i8* null, i8** [[TMP39]], align 4
-// CHECK3-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK3-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK3-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK3-NEXT:    store i8* null, i8** [[TMP30]], align 4
+// CHECK3-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 4
+// CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 4
+// CHECK3-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK3-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 4
+// CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK3-NEXT:    store i8* null, i8** [[TMP36]], align 4
+// CHECK3-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK3-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK3-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK3:       omp_offload.failed:
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -4744,15 +4684,15 @@ int bar(int n){
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK3-NEXT:    br label [[OMP_IF_END]]
 // CHECK3:       omp_if.end:
-// CHECK3-NEXT:    [[TMP45:%.*]] = mul nsw i32 1, [[TMP1]]
-// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP45]]
+// CHECK3-NEXT:    [[TMP42:%.*]] = mul nsw i32 1, [[TMP1]]
+// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP42]]
 // CHECK3-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
-// CHECK3-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK3-NEXT:    [[CONV:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK3-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP47]]
-// CHECK3-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK3-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK3-NEXT:    [[CONV:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK3-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP44]]
+// CHECK3-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK3-NEXT:    ret i32 [[ADD3]]
 //
 //
@@ -4823,7 +4763,7 @@ int bar(int n){
 // CHECK3-NEXT:    store i8* null, i8** [[TMP26]], align 4
 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l224.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK3-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l224.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK3-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK3-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK3:       omp_offload.failed:
@@ -4891,7 +4831,7 @@ int bar(int n){
 // CHECK3-NEXT:    store i8* null, i8** [[TMP19]], align 4
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l207.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.15, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK3-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l207.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK3-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK3-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK3:       omp_offload.failed:
@@ -4928,11 +4868,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
 // CHECK3-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5048,11 +4988,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
 // CHECK3-NEXT:    store i8 [[TMP5]], i8* [[CONV3]], align 1
 // CHECK3-NEXT:    [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5095,11 +5035,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
 // CHECK3-NEXT:    store i16 [[TMP3]], i16* [[CONV1]], align 2
 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5380,14 +5320,14 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP84:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK4-NEXT:    [[TMP85:%.*]] = mul nuw i32 [[TMP84]], 8
 // CHECK4-NEXT:    [[TMP86:%.*]] = sext i32 [[TMP85]] to i64
-// CHECK4-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP78]], i32* [[TMP88]], align 4
-// CHECK4-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP78]], i32* [[TMP90]], align 4
-// CHECK4-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK4-NEXT:    store i64 4, i64* [[TMP91]], align 4
+// CHECK4-NEXT:    [[TMP87:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK4-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP87]], i8* align 4 bitcast ([10 x i64]* @.offload_sizes.8 to i8*), i32 80, i1 false)
+// CHECK4-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP78]], i32* [[TMP89]], align 4
+// CHECK4-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP78]], i32* [[TMP91]], align 4
 // CHECK4-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 0
 // CHECK4-NEXT:    store i8* null, i8** [[TMP92]], align 4
 // CHECK4-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 1
@@ -5396,96 +5336,82 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 1
 // CHECK4-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to [10 x float]**
 // CHECK4-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP96]], align 4
-// CHECK4-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK4-NEXT:    store i64 40, i64* [[TMP97]], align 4
-// CHECK4-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 1
-// CHECK4-NEXT:    store i8* null, i8** [[TMP98]], align 4
-// CHECK4-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 2
-// CHECK4-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP100]], align 4
-// CHECK4-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 2
-// CHECK4-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP102]], align 4
-// CHECK4-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK4-NEXT:    store i64 4, i64* [[TMP103]], align 4
-// CHECK4-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 2
-// CHECK4-NEXT:    store i8* null, i8** [[TMP104]], align 4
-// CHECK4-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 3
+// CHECK4-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 1
+// CHECK4-NEXT:    store i8* null, i8** [[TMP97]], align 4
+// CHECK4-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 2
+// CHECK4-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP99]], align 4
+// CHECK4-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 2
+// CHECK4-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP101]], align 4
+// CHECK4-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 2
+// CHECK4-NEXT:    store i8* null, i8** [[TMP102]], align 4
+// CHECK4-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 3
+// CHECK4-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to float**
+// CHECK4-NEXT:    store float* [[VLA]], float** [[TMP104]], align 4
+// CHECK4-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 3
 // CHECK4-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to float**
 // CHECK4-NEXT:    store float* [[VLA]], float** [[TMP106]], align 4
-// CHECK4-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 3
-// CHECK4-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to float**
-// CHECK4-NEXT:    store float* [[VLA]], float** [[TMP108]], align 4
-// CHECK4-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK4-NEXT:    store i64 [[TMP83]], i64* [[TMP109]], align 4
-// CHECK4-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 3
-// CHECK4-NEXT:    store i8* null, i8** [[TMP110]], align 4
-// CHECK4-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 4
+// CHECK4-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK4-NEXT:    store i64 [[TMP83]], i64* [[TMP107]], align 4
+// CHECK4-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 3
+// CHECK4-NEXT:    store i8* null, i8** [[TMP108]], align 4
+// CHECK4-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 4
+// CHECK4-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to [5 x [10 x double]]**
+// CHECK4-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP110]], align 4
+// CHECK4-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 4
 // CHECK4-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to [5 x [10 x double]]**
 // CHECK4-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP112]], align 4
-// CHECK4-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 4
-// CHECK4-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to [5 x [10 x double]]**
-// CHECK4-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP114]], align 4
-// CHECK4-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK4-NEXT:    store i64 400, i64* [[TMP115]], align 4
-// CHECK4-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 4
-// CHECK4-NEXT:    store i8* null, i8** [[TMP116]], align 4
-// CHECK4-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 5
-// CHECK4-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to i32*
-// CHECK4-NEXT:    store i32 5, i32* [[TMP118]], align 4
-// CHECK4-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 5
+// CHECK4-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 4
+// CHECK4-NEXT:    store i8* null, i8** [[TMP113]], align 4
+// CHECK4-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 5
+// CHECK4-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32*
+// CHECK4-NEXT:    store i32 5, i32* [[TMP115]], align 4
+// CHECK4-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 5
+// CHECK4-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32*
+// CHECK4-NEXT:    store i32 5, i32* [[TMP117]], align 4
+// CHECK4-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 5
+// CHECK4-NEXT:    store i8* null, i8** [[TMP118]], align 4
+// CHECK4-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 6
 // CHECK4-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i32*
-// CHECK4-NEXT:    store i32 5, i32* [[TMP120]], align 4
-// CHECK4-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK4-NEXT:    store i64 4, i64* [[TMP121]], align 4
-// CHECK4-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 5
-// CHECK4-NEXT:    store i8* null, i8** [[TMP122]], align 4
-// CHECK4-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 6
-// CHECK4-NEXT:    [[TMP124:%.*]] = bitcast i8** [[TMP123]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[TMP124]], align 4
-// CHECK4-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 6
-// CHECK4-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[TMP126]], align 4
-// CHECK4-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK4-NEXT:    store i64 4, i64* [[TMP127]], align 4
-// CHECK4-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 6
-// CHECK4-NEXT:    store i8* null, i8** [[TMP128]], align 4
-// CHECK4-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 7
-// CHECK4-NEXT:    [[TMP130:%.*]] = bitcast i8** [[TMP129]] to double**
-// CHECK4-NEXT:    store double* [[VLA1]], double** [[TMP130]], align 4
-// CHECK4-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 7
-// CHECK4-NEXT:    [[TMP132:%.*]] = bitcast i8** [[TMP131]] to double**
-// CHECK4-NEXT:    store double* [[VLA1]], double** [[TMP132]], align 4
-// CHECK4-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK4-NEXT:    store i64 [[TMP86]], i64* [[TMP133]], align 4
-// CHECK4-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 7
+// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[TMP120]], align 4
+// CHECK4-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 6
+// CHECK4-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[TMP122]], align 4
+// CHECK4-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 6
+// CHECK4-NEXT:    store i8* null, i8** [[TMP123]], align 4
+// CHECK4-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 7
+// CHECK4-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to double**
+// CHECK4-NEXT:    store double* [[VLA1]], double** [[TMP125]], align 4
+// CHECK4-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 7
+// CHECK4-NEXT:    [[TMP127:%.*]] = bitcast i8** [[TMP126]] to double**
+// CHECK4-NEXT:    store double* [[VLA1]], double** [[TMP127]], align 4
+// CHECK4-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK4-NEXT:    store i64 [[TMP86]], i64* [[TMP128]], align 4
+// CHECK4-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 7
+// CHECK4-NEXT:    store i8* null, i8** [[TMP129]], align 4
+// CHECK4-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 8
+// CHECK4-NEXT:    [[TMP131:%.*]] = bitcast i8** [[TMP130]] to %struct.TT**
+// CHECK4-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP131]], align 4
+// CHECK4-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 8
+// CHECK4-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to %struct.TT**
+// CHECK4-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP133]], align 4
+// CHECK4-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 8
 // CHECK4-NEXT:    store i8* null, i8** [[TMP134]], align 4
-// CHECK4-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 8
-// CHECK4-NEXT:    [[TMP136:%.*]] = bitcast i8** [[TMP135]] to %struct.TT**
-// CHECK4-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP136]], align 4
-// CHECK4-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 8
-// CHECK4-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to %struct.TT**
-// CHECK4-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP138]], align 4
-// CHECK4-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK4-NEXT:    store i64 12, i64* [[TMP139]], align 4
-// CHECK4-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 8
-// CHECK4-NEXT:    store i8* null, i8** [[TMP140]], align 4
-// CHECK4-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 9
-// CHECK4-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP80]], i32* [[TMP142]], align 4
-// CHECK4-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 9
-// CHECK4-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP80]], i32* [[TMP144]], align 4
-// CHECK4-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK4-NEXT:    store i64 4, i64* [[TMP145]], align 4
-// CHECK4-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 9
-// CHECK4-NEXT:    store i8* null, i8** [[TMP146]], align 4
-// CHECK4-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP150:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170.region_id, i32 10, i8** [[TMP147]], i8** [[TMP148]], i64* [[TMP149]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK4-NEXT:    [[TMP151:%.*]] = icmp ne i32 [[TMP150]], 0
-// CHECK4-NEXT:    br i1 [[TMP151]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
+// CHECK4-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 9
+// CHECK4-NEXT:    [[TMP136:%.*]] = bitcast i8** [[TMP135]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP80]], i32* [[TMP136]], align 4
+// CHECK4-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 9
+// CHECK4-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP80]], i32* [[TMP138]], align 4
+// CHECK4-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 9
+// CHECK4-NEXT:    store i8* null, i8** [[TMP139]], align 4
+// CHECK4-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP143:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170.region_id, i32 10, i8** [[TMP140]], i8** [[TMP141]], i64* [[TMP142]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK4-NEXT:    [[TMP144:%.*]] = icmp ne i32 [[TMP143]], 0
+// CHECK4-NEXT:    br i1 [[TMP144]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK4:       omp_offload.failed17:
 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170(i32 [[TMP78]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]], i32 [[TMP80]]) #[[ATTR4]]
 // CHECK4-NEXT:    br label [[OMP_OFFLOAD_CONT18]]
@@ -5495,10 +5421,10 @@ int bar(int n){
 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170(i32 [[TMP78]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]], i32 [[TMP80]]) #[[ATTR4]]
 // CHECK4-NEXT:    br label [[OMP_IF_END20]]
 // CHECK4:       omp_if.end20:
-// CHECK4-NEXT:    [[TMP152:%.*]] = load i32, i32* [[A]], align 4
-// CHECK4-NEXT:    [[TMP153:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP153]])
-// CHECK4-NEXT:    ret i32 [[TMP152]]
+// CHECK4-NEXT:    [[TMP145:%.*]] = load i32, i32* [[A]], align 4
+// CHECK4-NEXT:    [[TMP146:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP146]])
+// CHECK4-NEXT:    ret i32 [[TMP145]]
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103
@@ -6273,14 +6199,14 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP7:%.*]] = mul nuw i32 2, [[TMP1]]
 // CHECK4-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 2
 // CHECK4-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-// CHECK4-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK4-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
-// CHECK4-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK4-NEXT:    store double* [[A]], double** [[TMP13]], align 4
-// CHECK4-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK4-NEXT:    store i64 8, i64* [[TMP14]], align 4
+// CHECK4-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK4-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.11 to i8*), i32 40, i1 false)
+// CHECK4-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK4-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 4
+// CHECK4-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK4-NEXT:    store double* [[A]], double** [[TMP14]], align 4
 // CHECK4-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK4-NEXT:    store i8* null, i8** [[TMP15]], align 4
 // CHECK4-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -6289,46 +6215,40 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK4-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32*
 // CHECK4-NEXT:    store i32 [[TMP5]], i32* [[TMP19]], align 4
-// CHECK4-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK4-NEXT:    store i64 4, i64* [[TMP20]], align 4
-// CHECK4-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK4-NEXT:    store i8* null, i8** [[TMP21]], align 4
-// CHECK4-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK4-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
-// CHECK4-NEXT:    store i32 2, i32* [[TMP23]], align 4
-// CHECK4-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK4-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
-// CHECK4-NEXT:    store i32 2, i32* [[TMP25]], align 4
-// CHECK4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK4-NEXT:    store i64 4, i64* [[TMP26]], align 4
-// CHECK4-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK4-NEXT:    store i8* null, i8** [[TMP27]], align 4
-// CHECK4-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK4-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK4-NEXT:    store i8* null, i8** [[TMP20]], align 4
+// CHECK4-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK4-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
+// CHECK4-NEXT:    store i32 2, i32* [[TMP22]], align 4
+// CHECK4-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK4-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
+// CHECK4-NEXT:    store i32 2, i32* [[TMP24]], align 4
+// CHECK4-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK4-NEXT:    store i8* null, i8** [[TMP25]], align 4
+// CHECK4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK4-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP27]], align 4
+// CHECK4-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK4-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
 // CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP29]], align 4
-// CHECK4-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK4-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP31]], align 4
-// CHECK4-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK4-NEXT:    store i64 4, i64* [[TMP32]], align 4
-// CHECK4-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK4-NEXT:    store i8* null, i8** [[TMP33]], align 4
-// CHECK4-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK4-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK4-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 4
-// CHECK4-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK4-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK4-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 4
-// CHECK4-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK4-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 4
-// CHECK4-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK4-NEXT:    store i8* null, i8** [[TMP39]], align 4
-// CHECK4-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK4-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK4-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK4-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK4-NEXT:    store i8* null, i8** [[TMP30]], align 4
+// CHECK4-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK4-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK4-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 4
+// CHECK4-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK4-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK4-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 4
+// CHECK4-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK4-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 4
+// CHECK4-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK4-NEXT:    store i8* null, i8** [[TMP36]], align 4
+// CHECK4-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK4-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK4-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK4:       omp_offload.failed:
 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK4-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -6338,15 +6258,15 @@ int bar(int n){
 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK4-NEXT:    br label [[OMP_IF_END]]
 // CHECK4:       omp_if.end:
-// CHECK4-NEXT:    [[TMP45:%.*]] = mul nsw i32 1, [[TMP1]]
-// CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP45]]
+// CHECK4-NEXT:    [[TMP42:%.*]] = mul nsw i32 1, [[TMP1]]
+// CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP42]]
 // CHECK4-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
-// CHECK4-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK4-NEXT:    [[CONV:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK4-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK4-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP47]]
-// CHECK4-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK4-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK4-NEXT:    [[CONV:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK4-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK4-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP44]]
+// CHECK4-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK4-NEXT:    ret i32 [[ADD3]]
 //
 //
@@ -6417,7 +6337,7 @@ int bar(int n){
 // CHECK4-NEXT:    store i8* null, i8** [[TMP26]], align 4
 // CHECK4-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK4-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l224.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK4-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l224.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK4-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK4-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK4:       omp_offload.failed:
@@ -6485,7 +6405,7 @@ int bar(int n){
 // CHECK4-NEXT:    store i8* null, i8** [[TMP19]], align 4
 // CHECK4-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK4-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l207.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.15, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK4-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l207.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK4-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK4-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK4:       omp_offload.failed:
@@ -6522,11 +6442,11 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4
 // CHECK4-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
 // CHECK4-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -6642,11 +6562,11 @@ int bar(int n){
 // CHECK4-NEXT:    [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
 // CHECK4-NEXT:    store i8 [[TMP5]], i8* [[CONV3]], align 1
 // CHECK4-NEXT:    [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -6689,11 +6609,11 @@ int bar(int n){
 // CHECK4-NEXT:    [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
 // CHECK4-NEXT:    store i16 [[TMP3]], i16* [[CONV1]], align 2
 // CHECK4-NEXT:    [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -10152,14 +10072,14 @@ int bar(int n){
 // CHECK17-NEXT:    [[TMP86:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK17-NEXT:    [[TMP87:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK17-NEXT:    [[TMP88:%.*]] = mul nuw i64 [[TMP87]], 8
-// CHECK17-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP82]], i64* [[TMP90]], align 8
-// CHECK17-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP82]], i64* [[TMP92]], align 8
-// CHECK17-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 4, i64* [[TMP93]], align 8
+// CHECK17-NEXT:    [[TMP89:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP89]], i8* align 8 bitcast ([10 x i64]* @.offload_sizes.8 to i8*), i64 80, i1 false)
+// CHECK17-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP82]], i64* [[TMP91]], align 8
+// CHECK17-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP93:%.*]] = bitcast i8** [[TMP92]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP82]], i64* [[TMP93]], align 8
 // CHECK17-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0
 // CHECK17-NEXT:    store i8* null, i8** [[TMP94]], align 8
 // CHECK17-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 1
@@ -10168,96 +10088,82 @@ int bar(int n){
 // CHECK17-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 1
 // CHECK17-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to [10 x float]**
 // CHECK17-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP98]], align 8
-// CHECK17-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 40, i64* [[TMP99]], align 8
-// CHECK17-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP100]], align 8
-// CHECK17-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP102]], align 8
-// CHECK17-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP104]], align 8
-// CHECK17-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 8, i64* [[TMP105]], align 8
-// CHECK17-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP106]], align 8
-// CHECK17-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP99]], align 8
+// CHECK17-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP101]], align 8
+// CHECK17-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP103:%.*]] = bitcast i8** [[TMP102]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP103]], align 8
+// CHECK17-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP104]], align 8
+// CHECK17-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to float**
+// CHECK17-NEXT:    store float* [[VLA]], float** [[TMP106]], align 8
+// CHECK17-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
 // CHECK17-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to float**
 // CHECK17-NEXT:    store float* [[VLA]], float** [[TMP108]], align 8
-// CHECK17-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to float**
-// CHECK17-NEXT:    store float* [[VLA]], float** [[TMP110]], align 8
-// CHECK17-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK17-NEXT:    store i64 [[TMP86]], i64* [[TMP111]], align 8
-// CHECK17-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 3
-// CHECK17-NEXT:    store i8* null, i8** [[TMP112]], align 8
-// CHECK17-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
+// CHECK17-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK17-NEXT:    store i64 [[TMP86]], i64* [[TMP109]], align 8
+// CHECK17-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 3
+// CHECK17-NEXT:    store i8* null, i8** [[TMP110]], align 8
+// CHECK17-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
+// CHECK17-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to [5 x [10 x double]]**
+// CHECK17-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP112]], align 8
+// CHECK17-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
 // CHECK17-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to [5 x [10 x double]]**
 // CHECK17-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP114]], align 8
-// CHECK17-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
-// CHECK17-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to [5 x [10 x double]]**
-// CHECK17-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP116]], align 8
-// CHECK17-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK17-NEXT:    store i64 400, i64* [[TMP117]], align 8
-// CHECK17-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 4
-// CHECK17-NEXT:    store i8* null, i8** [[TMP118]], align 8
-// CHECK17-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 5
-// CHECK17-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i64*
-// CHECK17-NEXT:    store i64 5, i64* [[TMP120]], align 8
-// CHECK17-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 5
+// CHECK17-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 4
+// CHECK17-NEXT:    store i8* null, i8** [[TMP115]], align 8
+// CHECK17-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 5
+// CHECK17-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i64*
+// CHECK17-NEXT:    store i64 5, i64* [[TMP117]], align 8
+// CHECK17-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 5
+// CHECK17-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to i64*
+// CHECK17-NEXT:    store i64 5, i64* [[TMP119]], align 8
+// CHECK17-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 5
+// CHECK17-NEXT:    store i8* null, i8** [[TMP120]], align 8
+// CHECK17-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 6
 // CHECK17-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to i64*
-// CHECK17-NEXT:    store i64 5, i64* [[TMP122]], align 8
-// CHECK17-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK17-NEXT:    store i64 8, i64* [[TMP123]], align 8
-// CHECK17-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 5
-// CHECK17-NEXT:    store i8* null, i8** [[TMP124]], align 8
-// CHECK17-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 6
-// CHECK17-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP126]], align 8
-// CHECK17-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 6
-// CHECK17-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP128]], align 8
-// CHECK17-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK17-NEXT:    store i64 8, i64* [[TMP129]], align 8
-// CHECK17-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 6
-// CHECK17-NEXT:    store i8* null, i8** [[TMP130]], align 8
-// CHECK17-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 7
-// CHECK17-NEXT:    [[TMP132:%.*]] = bitcast i8** [[TMP131]] to double**
-// CHECK17-NEXT:    store double* [[VLA1]], double** [[TMP132]], align 8
-// CHECK17-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 7
-// CHECK17-NEXT:    [[TMP134:%.*]] = bitcast i8** [[TMP133]] to double**
-// CHECK17-NEXT:    store double* [[VLA1]], double** [[TMP134]], align 8
-// CHECK17-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK17-NEXT:    store i64 [[TMP88]], i64* [[TMP135]], align 8
-// CHECK17-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 7
+// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP122]], align 8
+// CHECK17-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 6
+// CHECK17-NEXT:    [[TMP124:%.*]] = bitcast i8** [[TMP123]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP124]], align 8
+// CHECK17-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 6
+// CHECK17-NEXT:    store i8* null, i8** [[TMP125]], align 8
+// CHECK17-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 7
+// CHECK17-NEXT:    [[TMP127:%.*]] = bitcast i8** [[TMP126]] to double**
+// CHECK17-NEXT:    store double* [[VLA1]], double** [[TMP127]], align 8
+// CHECK17-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 7
+// CHECK17-NEXT:    [[TMP129:%.*]] = bitcast i8** [[TMP128]] to double**
+// CHECK17-NEXT:    store double* [[VLA1]], double** [[TMP129]], align 8
+// CHECK17-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK17-NEXT:    store i64 [[TMP88]], i64* [[TMP130]], align 8
+// CHECK17-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 7
+// CHECK17-NEXT:    store i8* null, i8** [[TMP131]], align 8
+// CHECK17-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 8
+// CHECK17-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to %struct.TT**
+// CHECK17-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP133]], align 8
+// CHECK17-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 8
+// CHECK17-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to %struct.TT**
+// CHECK17-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP135]], align 8
+// CHECK17-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 8
 // CHECK17-NEXT:    store i8* null, i8** [[TMP136]], align 8
-// CHECK17-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 8
-// CHECK17-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to %struct.TT**
-// CHECK17-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP138]], align 8
-// CHECK17-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 8
-// CHECK17-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to %struct.TT**
-// CHECK17-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP140]], align 8
-// CHECK17-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK17-NEXT:    store i64 16, i64* [[TMP141]], align 8
-// CHECK17-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 8
-// CHECK17-NEXT:    store i8* null, i8** [[TMP142]], align 8
-// CHECK17-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 9
-// CHECK17-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP84]], i64* [[TMP144]], align 8
-// CHECK17-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 9
-// CHECK17-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP84]], i64* [[TMP146]], align 8
-// CHECK17-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK17-NEXT:    store i64 4, i64* [[TMP147]], align 8
-// CHECK17-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 9
-// CHECK17-NEXT:    store i8* null, i8** [[TMP148]], align 8
-// CHECK17-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP152:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170.region_id, i32 10, i8** [[TMP149]], i8** [[TMP150]], i64* [[TMP151]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK17-NEXT:    [[TMP153:%.*]] = icmp ne i32 [[TMP152]], 0
-// CHECK17-NEXT:    br i1 [[TMP153]], label [[OMP_OFFLOAD_FAILED23:%.*]], label [[OMP_OFFLOAD_CONT24:%.*]]
+// CHECK17-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 9
+// CHECK17-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP84]], i64* [[TMP138]], align 8
+// CHECK17-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 9
+// CHECK17-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP84]], i64* [[TMP140]], align 8
+// CHECK17-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 9
+// CHECK17-NEXT:    store i8* null, i8** [[TMP141]], align 8
+// CHECK17-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP145:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170.region_id, i32 10, i8** [[TMP142]], i8** [[TMP143]], i64* [[TMP144]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK17-NEXT:    [[TMP146:%.*]] = icmp ne i32 [[TMP145]], 0
+// CHECK17-NEXT:    br i1 [[TMP146]], label [[OMP_OFFLOAD_FAILED23:%.*]], label [[OMP_OFFLOAD_CONT24:%.*]]
 // CHECK17:       omp_offload.failed23:
 // CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170(i64 [[TMP82]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]], i64 [[TMP84]]) #[[ATTR4]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT24]]
@@ -10267,10 +10173,10 @@ int bar(int n){
 // CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170(i64 [[TMP82]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]], i64 [[TMP84]]) #[[ATTR4]]
 // CHECK17-NEXT:    br label [[OMP_IF_END26]]
 // CHECK17:       omp_if.end26:
-// CHECK17-NEXT:    [[TMP154:%.*]] = load i32, i32* [[A]], align 4
-// CHECK17-NEXT:    [[TMP155:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP155]])
-// CHECK17-NEXT:    ret i32 [[TMP154]]
+// CHECK17-NEXT:    [[TMP147:%.*]] = load i32, i32* [[A]], align 4
+// CHECK17-NEXT:    [[TMP148:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP148]])
+// CHECK17-NEXT:    ret i32 [[TMP147]]
 //
 //
 // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103
@@ -11068,14 +10974,14 @@ int bar(int n){
 // CHECK17-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP8:%.*]] = mul nuw i64 2, [[TMP2]]
 // CHECK17-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2
-// CHECK17-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK17-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
-// CHECK17-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK17-NEXT:    store double* [[A]], double** [[TMP13]], align 8
-// CHECK17-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 8, i64* [[TMP14]], align 8
+// CHECK17-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.11 to i8*), i64 40, i1 false)
+// CHECK17-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK17-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 8
+// CHECK17-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK17-NEXT:    store double* [[A]], double** [[TMP14]], align 8
 // CHECK17-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK17-NEXT:    store i8* null, i8** [[TMP15]], align 8
 // CHECK17-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -11084,46 +10990,40 @@ int bar(int n){
 // CHECK17-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK17-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i64*
 // CHECK17-NEXT:    store i64 [[TMP6]], i64* [[TMP19]], align 8
-// CHECK17-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 4, i64* [[TMP20]], align 8
-// CHECK17-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP21]], align 8
-// CHECK17-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
-// CHECK17-NEXT:    store i64 2, i64* [[TMP23]], align 8
-// CHECK17-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK17-NEXT:    store i64 2, i64* [[TMP25]], align 8
-// CHECK17-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 8, i64* [[TMP26]], align 8
-// CHECK17-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP27]], align 8
-// CHECK17-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP20]], align 8
+// CHECK17-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
+// CHECK17-NEXT:    store i64 2, i64* [[TMP22]], align 8
+// CHECK17-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK17-NEXT:    store i64 2, i64* [[TMP24]], align 8
+// CHECK17-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP25]], align 8
+// CHECK17-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP27]], align 8
+// CHECK17-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK17-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
 // CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP29]], align 8
-// CHECK17-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP31]], align 8
-// CHECK17-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK17-NEXT:    store i64 8, i64* [[TMP32]], align 8
-// CHECK17-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK17-NEXT:    store i8* null, i8** [[TMP33]], align 8
-// CHECK17-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK17-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK17-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 8
-// CHECK17-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK17-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK17-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 8
-// CHECK17-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK17-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 8
-// CHECK17-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK17-NEXT:    store i8* null, i8** [[TMP39]], align 8
-// CHECK17-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK17-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK17-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK17-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK17-NEXT:    store i8* null, i8** [[TMP30]], align 8
+// CHECK17-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK17-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK17-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 8
+// CHECK17-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK17-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK17-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 8
+// CHECK17-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK17-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 8
+// CHECK17-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK17-NEXT:    store i8* null, i8** [[TMP36]], align 8
+// CHECK17-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK17-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK17-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK17:       omp_offload.failed:
 // CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -11133,15 +11033,15 @@ int bar(int n){
 // CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK17-NEXT:    br label [[OMP_IF_END]]
 // CHECK17:       omp_if.end:
-// CHECK17-NEXT:    [[TMP45:%.*]] = mul nsw i64 1, [[TMP2]]
-// CHECK17-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP45]]
+// CHECK17-NEXT:    [[TMP42:%.*]] = mul nsw i64 1, [[TMP2]]
+// CHECK17-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP42]]
 // CHECK17-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
-// CHECK17-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK17-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK17-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK17-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP47]]
-// CHECK17-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK17-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK17-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK17-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK17-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP44]]
+// CHECK17-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK17-NEXT:    ret i32 [[ADD4]]
 //
 //
@@ -11213,7 +11113,7 @@ int bar(int n){
 // CHECK17-NEXT:    store i8* null, i8** [[TMP26]], align 8
 // CHECK17-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l224.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK17-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l224.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK17-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK17-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK17:       omp_offload.failed:
@@ -11282,7 +11182,7 @@ int bar(int n){
 // CHECK17-NEXT:    store i8* null, i8** [[TMP19]], align 8
 // CHECK17-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l207.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.15, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK17-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l207.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK17-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK17-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK17:       omp_offload.failed:
@@ -11321,11 +11221,11 @@ int bar(int n){
 // CHECK17-NEXT:    [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK17-NEXT:    [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11444,11 +11344,11 @@ int bar(int n){
 // CHECK17-NEXT:    [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
 // CHECK17-NEXT:    store i8 [[TMP5]], i8* [[CONV5]], align 1
 // CHECK17-NEXT:    [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11494,11 +11394,11 @@ int bar(int n){
 // CHECK17-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
 // CHECK17-NEXT:    store i16 [[TMP3]], i16* [[CONV3]], align 2
 // CHECK17-NEXT:    [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11790,14 +11690,14 @@ int bar(int n){
 // CHECK18-NEXT:    [[TMP86:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK18-NEXT:    [[TMP87:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK18-NEXT:    [[TMP88:%.*]] = mul nuw i64 [[TMP87]], 8
-// CHECK18-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP82]], i64* [[TMP90]], align 8
-// CHECK18-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP82]], i64* [[TMP92]], align 8
-// CHECK18-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 4, i64* [[TMP93]], align 8
+// CHECK18-NEXT:    [[TMP89:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP89]], i8* align 8 bitcast ([10 x i64]* @.offload_sizes.8 to i8*), i64 80, i1 false)
+// CHECK18-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP82]], i64* [[TMP91]], align 8
+// CHECK18-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP93:%.*]] = bitcast i8** [[TMP92]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP82]], i64* [[TMP93]], align 8
 // CHECK18-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0
 // CHECK18-NEXT:    store i8* null, i8** [[TMP94]], align 8
 // CHECK18-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 1
@@ -11806,96 +11706,82 @@ int bar(int n){
 // CHECK18-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 1
 // CHECK18-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to [10 x float]**
 // CHECK18-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP98]], align 8
-// CHECK18-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 40, i64* [[TMP99]], align 8
-// CHECK18-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP100]], align 8
-// CHECK18-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP102]], align 8
-// CHECK18-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP104]], align 8
-// CHECK18-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 8, i64* [[TMP105]], align 8
-// CHECK18-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP106]], align 8
-// CHECK18-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP99]], align 8
+// CHECK18-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP101]], align 8
+// CHECK18-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP103:%.*]] = bitcast i8** [[TMP102]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP103]], align 8
+// CHECK18-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP104]], align 8
+// CHECK18-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to float**
+// CHECK18-NEXT:    store float* [[VLA]], float** [[TMP106]], align 8
+// CHECK18-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
 // CHECK18-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to float**
 // CHECK18-NEXT:    store float* [[VLA]], float** [[TMP108]], align 8
-// CHECK18-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to float**
-// CHECK18-NEXT:    store float* [[VLA]], float** [[TMP110]], align 8
-// CHECK18-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK18-NEXT:    store i64 [[TMP86]], i64* [[TMP111]], align 8
-// CHECK18-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 3
-// CHECK18-NEXT:    store i8* null, i8** [[TMP112]], align 8
-// CHECK18-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
+// CHECK18-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK18-NEXT:    store i64 [[TMP86]], i64* [[TMP109]], align 8
+// CHECK18-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 3
+// CHECK18-NEXT:    store i8* null, i8** [[TMP110]], align 8
+// CHECK18-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
+// CHECK18-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to [5 x [10 x double]]**
+// CHECK18-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP112]], align 8
+// CHECK18-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
 // CHECK18-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to [5 x [10 x double]]**
 // CHECK18-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP114]], align 8
-// CHECK18-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
-// CHECK18-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to [5 x [10 x double]]**
-// CHECK18-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP116]], align 8
-// CHECK18-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK18-NEXT:    store i64 400, i64* [[TMP117]], align 8
-// CHECK18-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 4
-// CHECK18-NEXT:    store i8* null, i8** [[TMP118]], align 8
-// CHECK18-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 5
-// CHECK18-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i64*
-// CHECK18-NEXT:    store i64 5, i64* [[TMP120]], align 8
-// CHECK18-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 5
+// CHECK18-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 4
+// CHECK18-NEXT:    store i8* null, i8** [[TMP115]], align 8
+// CHECK18-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 5
+// CHECK18-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i64*
+// CHECK18-NEXT:    store i64 5, i64* [[TMP117]], align 8
+// CHECK18-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 5
+// CHECK18-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to i64*
+// CHECK18-NEXT:    store i64 5, i64* [[TMP119]], align 8
+// CHECK18-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 5
+// CHECK18-NEXT:    store i8* null, i8** [[TMP120]], align 8
+// CHECK18-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 6
 // CHECK18-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to i64*
-// CHECK18-NEXT:    store i64 5, i64* [[TMP122]], align 8
-// CHECK18-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK18-NEXT:    store i64 8, i64* [[TMP123]], align 8
-// CHECK18-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 5
-// CHECK18-NEXT:    store i8* null, i8** [[TMP124]], align 8
-// CHECK18-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 6
-// CHECK18-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP126]], align 8
-// CHECK18-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 6
-// CHECK18-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP128]], align 8
-// CHECK18-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK18-NEXT:    store i64 8, i64* [[TMP129]], align 8
-// CHECK18-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 6
-// CHECK18-NEXT:    store i8* null, i8** [[TMP130]], align 8
-// CHECK18-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 7
-// CHECK18-NEXT:    [[TMP132:%.*]] = bitcast i8** [[TMP131]] to double**
-// CHECK18-NEXT:    store double* [[VLA1]], double** [[TMP132]], align 8
-// CHECK18-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 7
-// CHECK18-NEXT:    [[TMP134:%.*]] = bitcast i8** [[TMP133]] to double**
-// CHECK18-NEXT:    store double* [[VLA1]], double** [[TMP134]], align 8
-// CHECK18-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK18-NEXT:    store i64 [[TMP88]], i64* [[TMP135]], align 8
-// CHECK18-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 7
+// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP122]], align 8
+// CHECK18-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 6
+// CHECK18-NEXT:    [[TMP124:%.*]] = bitcast i8** [[TMP123]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP124]], align 8
+// CHECK18-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 6
+// CHECK18-NEXT:    store i8* null, i8** [[TMP125]], align 8
+// CHECK18-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 7
+// CHECK18-NEXT:    [[TMP127:%.*]] = bitcast i8** [[TMP126]] to double**
+// CHECK18-NEXT:    store double* [[VLA1]], double** [[TMP127]], align 8
+// CHECK18-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 7
+// CHECK18-NEXT:    [[TMP129:%.*]] = bitcast i8** [[TMP128]] to double**
+// CHECK18-NEXT:    store double* [[VLA1]], double** [[TMP129]], align 8
+// CHECK18-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK18-NEXT:    store i64 [[TMP88]], i64* [[TMP130]], align 8
+// CHECK18-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 7
+// CHECK18-NEXT:    store i8* null, i8** [[TMP131]], align 8
+// CHECK18-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 8
+// CHECK18-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to %struct.TT**
+// CHECK18-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP133]], align 8
+// CHECK18-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 8
+// CHECK18-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to %struct.TT**
+// CHECK18-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP135]], align 8
+// CHECK18-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 8
 // CHECK18-NEXT:    store i8* null, i8** [[TMP136]], align 8
-// CHECK18-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 8
-// CHECK18-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to %struct.TT**
-// CHECK18-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP138]], align 8
-// CHECK18-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 8
-// CHECK18-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to %struct.TT**
-// CHECK18-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP140]], align 8
-// CHECK18-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK18-NEXT:    store i64 16, i64* [[TMP141]], align 8
-// CHECK18-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 8
-// CHECK18-NEXT:    store i8* null, i8** [[TMP142]], align 8
-// CHECK18-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 9
-// CHECK18-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP84]], i64* [[TMP144]], align 8
-// CHECK18-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 9
-// CHECK18-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP84]], i64* [[TMP146]], align 8
-// CHECK18-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK18-NEXT:    store i64 4, i64* [[TMP147]], align 8
-// CHECK18-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 9
-// CHECK18-NEXT:    store i8* null, i8** [[TMP148]], align 8
-// CHECK18-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP152:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170.region_id, i32 10, i8** [[TMP149]], i8** [[TMP150]], i64* [[TMP151]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK18-NEXT:    [[TMP153:%.*]] = icmp ne i32 [[TMP152]], 0
-// CHECK18-NEXT:    br i1 [[TMP153]], label [[OMP_OFFLOAD_FAILED23:%.*]], label [[OMP_OFFLOAD_CONT24:%.*]]
+// CHECK18-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 9
+// CHECK18-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP84]], i64* [[TMP138]], align 8
+// CHECK18-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 9
+// CHECK18-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP84]], i64* [[TMP140]], align 8
+// CHECK18-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 9
+// CHECK18-NEXT:    store i8* null, i8** [[TMP141]], align 8
+// CHECK18-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP145:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170.region_id, i32 10, i8** [[TMP142]], i8** [[TMP143]], i64* [[TMP144]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK18-NEXT:    [[TMP146:%.*]] = icmp ne i32 [[TMP145]], 0
+// CHECK18-NEXT:    br i1 [[TMP146]], label [[OMP_OFFLOAD_FAILED23:%.*]], label [[OMP_OFFLOAD_CONT24:%.*]]
 // CHECK18:       omp_offload.failed23:
 // CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170(i64 [[TMP82]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]], i64 [[TMP84]]) #[[ATTR4]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT24]]
@@ -11905,10 +11791,10 @@ int bar(int n){
 // CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170(i64 [[TMP82]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]], i64 [[TMP84]]) #[[ATTR4]]
 // CHECK18-NEXT:    br label [[OMP_IF_END26]]
 // CHECK18:       omp_if.end26:
-// CHECK18-NEXT:    [[TMP154:%.*]] = load i32, i32* [[A]], align 4
-// CHECK18-NEXT:    [[TMP155:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP155]])
-// CHECK18-NEXT:    ret i32 [[TMP154]]
+// CHECK18-NEXT:    [[TMP147:%.*]] = load i32, i32* [[A]], align 4
+// CHECK18-NEXT:    [[TMP148:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP148]])
+// CHECK18-NEXT:    ret i32 [[TMP147]]
 //
 //
 // CHECK18-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103
@@ -12706,14 +12592,14 @@ int bar(int n){
 // CHECK18-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP8:%.*]] = mul nuw i64 2, [[TMP2]]
 // CHECK18-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2
-// CHECK18-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK18-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
-// CHECK18-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK18-NEXT:    store double* [[A]], double** [[TMP13]], align 8
-// CHECK18-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 8, i64* [[TMP14]], align 8
+// CHECK18-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.11 to i8*), i64 40, i1 false)
+// CHECK18-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK18-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 8
+// CHECK18-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK18-NEXT:    store double* [[A]], double** [[TMP14]], align 8
 // CHECK18-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK18-NEXT:    store i8* null, i8** [[TMP15]], align 8
 // CHECK18-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -12722,46 +12608,40 @@ int bar(int n){
 // CHECK18-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK18-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i64*
 // CHECK18-NEXT:    store i64 [[TMP6]], i64* [[TMP19]], align 8
-// CHECK18-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 4, i64* [[TMP20]], align 8
-// CHECK18-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP21]], align 8
-// CHECK18-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
-// CHECK18-NEXT:    store i64 2, i64* [[TMP23]], align 8
-// CHECK18-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK18-NEXT:    store i64 2, i64* [[TMP25]], align 8
-// CHECK18-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 8, i64* [[TMP26]], align 8
-// CHECK18-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP27]], align 8
-// CHECK18-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP20]], align 8
+// CHECK18-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
+// CHECK18-NEXT:    store i64 2, i64* [[TMP22]], align 8
+// CHECK18-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK18-NEXT:    store i64 2, i64* [[TMP24]], align 8
+// CHECK18-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP25]], align 8
+// CHECK18-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP27]], align 8
+// CHECK18-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK18-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
 // CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP29]], align 8
-// CHECK18-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP31]], align 8
-// CHECK18-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK18-NEXT:    store i64 8, i64* [[TMP32]], align 8
-// CHECK18-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK18-NEXT:    store i8* null, i8** [[TMP33]], align 8
-// CHECK18-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK18-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK18-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 8
-// CHECK18-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK18-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK18-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 8
-// CHECK18-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK18-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 8
-// CHECK18-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK18-NEXT:    store i8* null, i8** [[TMP39]], align 8
-// CHECK18-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK18-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK18-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK18-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK18-NEXT:    store i8* null, i8** [[TMP30]], align 8
+// CHECK18-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK18-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK18-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 8
+// CHECK18-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK18-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK18-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 8
+// CHECK18-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK18-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 8
+// CHECK18-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK18-NEXT:    store i8* null, i8** [[TMP36]], align 8
+// CHECK18-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK18-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK18-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK18:       omp_offload.failed:
 // CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -12771,15 +12651,15 @@ int bar(int n){
 // CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK18-NEXT:    br label [[OMP_IF_END]]
 // CHECK18:       omp_if.end:
-// CHECK18-NEXT:    [[TMP45:%.*]] = mul nsw i64 1, [[TMP2]]
-// CHECK18-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP45]]
+// CHECK18-NEXT:    [[TMP42:%.*]] = mul nsw i64 1, [[TMP2]]
+// CHECK18-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP42]]
 // CHECK18-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
-// CHECK18-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK18-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK18-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK18-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP47]]
-// CHECK18-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK18-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK18-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK18-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK18-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP44]]
+// CHECK18-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK18-NEXT:    ret i32 [[ADD4]]
 //
 //
@@ -12851,7 +12731,7 @@ int bar(int n){
 // CHECK18-NEXT:    store i8* null, i8** [[TMP26]], align 8
 // CHECK18-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l224.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK18-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l224.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK18-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK18-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK18:       omp_offload.failed:
@@ -12920,7 +12800,7 @@ int bar(int n){
 // CHECK18-NEXT:    store i8* null, i8** [[TMP19]], align 8
 // CHECK18-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l207.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.15, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK18-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l207.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK18-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK18-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK18:       omp_offload.failed:
@@ -12959,11 +12839,11 @@ int bar(int n){
 // CHECK18-NEXT:    [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK18-NEXT:    [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -13082,11 +12962,11 @@ int bar(int n){
 // CHECK18-NEXT:    [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
 // CHECK18-NEXT:    store i8 [[TMP5]], i8* [[CONV5]], align 1
 // CHECK18-NEXT:    [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -13132,11 +13012,11 @@ int bar(int n){
 // CHECK18-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
 // CHECK18-NEXT:    store i16 [[TMP3]], i16* [[CONV3]], align 2
 // CHECK18-NEXT:    [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -13418,14 +13298,14 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP84:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK19-NEXT:    [[TMP85:%.*]] = mul nuw i32 [[TMP84]], 8
 // CHECK19-NEXT:    [[TMP86:%.*]] = sext i32 [[TMP85]] to i64
-// CHECK19-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP78]], i32* [[TMP88]], align 4
-// CHECK19-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP78]], i32* [[TMP90]], align 4
-// CHECK19-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 4, i64* [[TMP91]], align 4
+// CHECK19-NEXT:    [[TMP87:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP87]], i8* align 4 bitcast ([10 x i64]* @.offload_sizes.8 to i8*), i32 80, i1 false)
+// CHECK19-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP78]], i32* [[TMP89]], align 4
+// CHECK19-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP78]], i32* [[TMP91]], align 4
 // CHECK19-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 0
 // CHECK19-NEXT:    store i8* null, i8** [[TMP92]], align 4
 // CHECK19-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 1
@@ -13434,96 +13314,82 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 1
 // CHECK19-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to [10 x float]**
 // CHECK19-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP96]], align 4
-// CHECK19-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 40, i64* [[TMP97]], align 4
-// CHECK19-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP98]], align 4
-// CHECK19-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP100]], align 4
-// CHECK19-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP102]], align 4
-// CHECK19-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 4, i64* [[TMP103]], align 4
-// CHECK19-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP104]], align 4
-// CHECK19-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP97]], align 4
+// CHECK19-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP99]], align 4
+// CHECK19-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP101]], align 4
+// CHECK19-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP102]], align 4
+// CHECK19-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to float**
+// CHECK19-NEXT:    store float* [[VLA]], float** [[TMP104]], align 4
+// CHECK19-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 3
 // CHECK19-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to float**
 // CHECK19-NEXT:    store float* [[VLA]], float** [[TMP106]], align 4
-// CHECK19-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to float**
-// CHECK19-NEXT:    store float* [[VLA]], float** [[TMP108]], align 4
-// CHECK19-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK19-NEXT:    store i64 [[TMP83]], i64* [[TMP109]], align 4
-// CHECK19-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 3
-// CHECK19-NEXT:    store i8* null, i8** [[TMP110]], align 4
-// CHECK19-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 4
+// CHECK19-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK19-NEXT:    store i64 [[TMP83]], i64* [[TMP107]], align 4
+// CHECK19-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 3
+// CHECK19-NEXT:    store i8* null, i8** [[TMP108]], align 4
+// CHECK19-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 4
+// CHECK19-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to [5 x [10 x double]]**
+// CHECK19-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP110]], align 4
+// CHECK19-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 4
 // CHECK19-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to [5 x [10 x double]]**
 // CHECK19-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP112]], align 4
-// CHECK19-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 4
-// CHECK19-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to [5 x [10 x double]]**
-// CHECK19-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP114]], align 4
-// CHECK19-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK19-NEXT:    store i64 400, i64* [[TMP115]], align 4
-// CHECK19-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 4
-// CHECK19-NEXT:    store i8* null, i8** [[TMP116]], align 4
-// CHECK19-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 5
-// CHECK19-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to i32*
-// CHECK19-NEXT:    store i32 5, i32* [[TMP118]], align 4
-// CHECK19-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 5
+// CHECK19-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 4
+// CHECK19-NEXT:    store i8* null, i8** [[TMP113]], align 4
+// CHECK19-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 5
+// CHECK19-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32*
+// CHECK19-NEXT:    store i32 5, i32* [[TMP115]], align 4
+// CHECK19-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 5
+// CHECK19-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32*
+// CHECK19-NEXT:    store i32 5, i32* [[TMP117]], align 4
+// CHECK19-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 5
+// CHECK19-NEXT:    store i8* null, i8** [[TMP118]], align 4
+// CHECK19-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 6
 // CHECK19-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i32*
-// CHECK19-NEXT:    store i32 5, i32* [[TMP120]], align 4
-// CHECK19-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK19-NEXT:    store i64 4, i64* [[TMP121]], align 4
-// CHECK19-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 5
-// CHECK19-NEXT:    store i8* null, i8** [[TMP122]], align 4
-// CHECK19-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 6
-// CHECK19-NEXT:    [[TMP124:%.*]] = bitcast i8** [[TMP123]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP124]], align 4
-// CHECK19-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 6
-// CHECK19-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP126]], align 4
-// CHECK19-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK19-NEXT:    store i64 4, i64* [[TMP127]], align 4
-// CHECK19-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 6
-// CHECK19-NEXT:    store i8* null, i8** [[TMP128]], align 4
-// CHECK19-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 7
-// CHECK19-NEXT:    [[TMP130:%.*]] = bitcast i8** [[TMP129]] to double**
-// CHECK19-NEXT:    store double* [[VLA1]], double** [[TMP130]], align 4
-// CHECK19-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 7
-// CHECK19-NEXT:    [[TMP132:%.*]] = bitcast i8** [[TMP131]] to double**
-// CHECK19-NEXT:    store double* [[VLA1]], double** [[TMP132]], align 4
-// CHECK19-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK19-NEXT:    store i64 [[TMP86]], i64* [[TMP133]], align 4
-// CHECK19-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 7
+// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP120]], align 4
+// CHECK19-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 6
+// CHECK19-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP122]], align 4
+// CHECK19-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 6
+// CHECK19-NEXT:    store i8* null, i8** [[TMP123]], align 4
+// CHECK19-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 7
+// CHECK19-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to double**
+// CHECK19-NEXT:    store double* [[VLA1]], double** [[TMP125]], align 4
+// CHECK19-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 7
+// CHECK19-NEXT:    [[TMP127:%.*]] = bitcast i8** [[TMP126]] to double**
+// CHECK19-NEXT:    store double* [[VLA1]], double** [[TMP127]], align 4
+// CHECK19-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK19-NEXT:    store i64 [[TMP86]], i64* [[TMP128]], align 4
+// CHECK19-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 7
+// CHECK19-NEXT:    store i8* null, i8** [[TMP129]], align 4
+// CHECK19-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 8
+// CHECK19-NEXT:    [[TMP131:%.*]] = bitcast i8** [[TMP130]] to %struct.TT**
+// CHECK19-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP131]], align 4
+// CHECK19-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 8
+// CHECK19-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to %struct.TT**
+// CHECK19-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP133]], align 4
+// CHECK19-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 8
 // CHECK19-NEXT:    store i8* null, i8** [[TMP134]], align 4
-// CHECK19-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 8
-// CHECK19-NEXT:    [[TMP136:%.*]] = bitcast i8** [[TMP135]] to %struct.TT**
-// CHECK19-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP136]], align 4
-// CHECK19-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 8
-// CHECK19-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to %struct.TT**
-// CHECK19-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP138]], align 4
-// CHECK19-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK19-NEXT:    store i64 12, i64* [[TMP139]], align 4
-// CHECK19-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 8
-// CHECK19-NEXT:    store i8* null, i8** [[TMP140]], align 4
-// CHECK19-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 9
-// CHECK19-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP80]], i32* [[TMP142]], align 4
-// CHECK19-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 9
-// CHECK19-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP80]], i32* [[TMP144]], align 4
-// CHECK19-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK19-NEXT:    store i64 4, i64* [[TMP145]], align 4
-// CHECK19-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 9
-// CHECK19-NEXT:    store i8* null, i8** [[TMP146]], align 4
-// CHECK19-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP150:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170.region_id, i32 10, i8** [[TMP147]], i8** [[TMP148]], i64* [[TMP149]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK19-NEXT:    [[TMP151:%.*]] = icmp ne i32 [[TMP150]], 0
-// CHECK19-NEXT:    br i1 [[TMP151]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
+// CHECK19-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 9
+// CHECK19-NEXT:    [[TMP136:%.*]] = bitcast i8** [[TMP135]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP80]], i32* [[TMP136]], align 4
+// CHECK19-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 9
+// CHECK19-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP80]], i32* [[TMP138]], align 4
+// CHECK19-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 9
+// CHECK19-NEXT:    store i8* null, i8** [[TMP139]], align 4
+// CHECK19-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP143:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170.region_id, i32 10, i8** [[TMP140]], i8** [[TMP141]], i64* [[TMP142]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK19-NEXT:    [[TMP144:%.*]] = icmp ne i32 [[TMP143]], 0
+// CHECK19-NEXT:    br i1 [[TMP144]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK19:       omp_offload.failed17:
 // CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170(i32 [[TMP78]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]], i32 [[TMP80]]) #[[ATTR4]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT18]]
@@ -13533,10 +13399,10 @@ int bar(int n){
 // CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170(i32 [[TMP78]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]], i32 [[TMP80]]) #[[ATTR4]]
 // CHECK19-NEXT:    br label [[OMP_IF_END20]]
 // CHECK19:       omp_if.end20:
-// CHECK19-NEXT:    [[TMP152:%.*]] = load i32, i32* [[A]], align 4
-// CHECK19-NEXT:    [[TMP153:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP153]])
-// CHECK19-NEXT:    ret i32 [[TMP152]]
+// CHECK19-NEXT:    [[TMP145:%.*]] = load i32, i32* [[A]], align 4
+// CHECK19-NEXT:    [[TMP146:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP146]])
+// CHECK19-NEXT:    ret i32 [[TMP145]]
 //
 //
 // CHECK19-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103
@@ -14311,14 +14177,14 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP7:%.*]] = mul nuw i32 2, [[TMP1]]
 // CHECK19-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 2
 // CHECK19-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-// CHECK19-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK19-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
-// CHECK19-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK19-NEXT:    store double* [[A]], double** [[TMP13]], align 4
-// CHECK19-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 8, i64* [[TMP14]], align 4
+// CHECK19-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.11 to i8*), i32 40, i1 false)
+// CHECK19-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK19-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 4
+// CHECK19-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK19-NEXT:    store double* [[A]], double** [[TMP14]], align 4
 // CHECK19-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK19-NEXT:    store i8* null, i8** [[TMP15]], align 4
 // CHECK19-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -14327,46 +14193,40 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK19-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32*
 // CHECK19-NEXT:    store i32 [[TMP5]], i32* [[TMP19]], align 4
-// CHECK19-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP20]], align 4
-// CHECK19-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP21]], align 4
-// CHECK19-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
-// CHECK19-NEXT:    store i32 2, i32* [[TMP23]], align 4
-// CHECK19-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
-// CHECK19-NEXT:    store i32 2, i32* [[TMP25]], align 4
-// CHECK19-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 4, i64* [[TMP26]], align 4
-// CHECK19-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP27]], align 4
-// CHECK19-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP20]], align 4
+// CHECK19-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
+// CHECK19-NEXT:    store i32 2, i32* [[TMP22]], align 4
+// CHECK19-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
+// CHECK19-NEXT:    store i32 2, i32* [[TMP24]], align 4
+// CHECK19-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP25]], align 4
+// CHECK19-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP27]], align 4
+// CHECK19-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK19-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
 // CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP29]], align 4
-// CHECK19-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP31]], align 4
-// CHECK19-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK19-NEXT:    store i64 4, i64* [[TMP32]], align 4
-// CHECK19-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK19-NEXT:    store i8* null, i8** [[TMP33]], align 4
-// CHECK19-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK19-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK19-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 4
-// CHECK19-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK19-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK19-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 4
-// CHECK19-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK19-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 4
-// CHECK19-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK19-NEXT:    store i8* null, i8** [[TMP39]], align 4
-// CHECK19-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK19-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK19-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK19-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK19-NEXT:    store i8* null, i8** [[TMP30]], align 4
+// CHECK19-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK19-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK19-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 4
+// CHECK19-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK19-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK19-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 4
+// CHECK19-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK19-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 4
+// CHECK19-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK19-NEXT:    store i8* null, i8** [[TMP36]], align 4
+// CHECK19-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK19-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK19-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK19:       omp_offload.failed:
 // CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -14376,15 +14236,15 @@ int bar(int n){
 // CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK19-NEXT:    br label [[OMP_IF_END]]
 // CHECK19:       omp_if.end:
-// CHECK19-NEXT:    [[TMP45:%.*]] = mul nsw i32 1, [[TMP1]]
-// CHECK19-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP45]]
+// CHECK19-NEXT:    [[TMP42:%.*]] = mul nsw i32 1, [[TMP1]]
+// CHECK19-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP42]]
 // CHECK19-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
-// CHECK19-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK19-NEXT:    [[CONV:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK19-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK19-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP47]]
-// CHECK19-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK19-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK19-NEXT:    [[CONV:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK19-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK19-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP44]]
+// CHECK19-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK19-NEXT:    ret i32 [[ADD3]]
 //
 //
@@ -14455,7 +14315,7 @@ int bar(int n){
 // CHECK19-NEXT:    store i8* null, i8** [[TMP26]], align 4
 // CHECK19-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l224.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK19-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l224.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK19-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK19-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK19:       omp_offload.failed:
@@ -14523,7 +14383,7 @@ int bar(int n){
 // CHECK19-NEXT:    store i8* null, i8** [[TMP19]], align 4
 // CHECK19-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l207.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.15, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK19-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l207.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK19-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK19-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK19:       omp_offload.failed:
@@ -14560,11 +14420,11 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
 // CHECK19-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -14680,11 +14540,11 @@ int bar(int n){
 // CHECK19-NEXT:    [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
 // CHECK19-NEXT:    store i8 [[TMP5]], i8* [[CONV3]], align 1
 // CHECK19-NEXT:    [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -14727,11 +14587,11 @@ int bar(int n){
 // CHECK19-NEXT:    [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
 // CHECK19-NEXT:    store i16 [[TMP3]], i16* [[CONV1]], align 2
 // CHECK19-NEXT:    [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -15012,14 +14872,14 @@ int bar(int n){
 // CHECK20-NEXT:    [[TMP84:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK20-NEXT:    [[TMP85:%.*]] = mul nuw i32 [[TMP84]], 8
 // CHECK20-NEXT:    [[TMP86:%.*]] = sext i32 [[TMP85]] to i64
-// CHECK20-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP78]], i32* [[TMP88]], align 4
-// CHECK20-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP78]], i32* [[TMP90]], align 4
-// CHECK20-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 4, i64* [[TMP91]], align 4
+// CHECK20-NEXT:    [[TMP87:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP87]], i8* align 4 bitcast ([10 x i64]* @.offload_sizes.8 to i8*), i32 80, i1 false)
+// CHECK20-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP78]], i32* [[TMP89]], align 4
+// CHECK20-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP78]], i32* [[TMP91]], align 4
 // CHECK20-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 0
 // CHECK20-NEXT:    store i8* null, i8** [[TMP92]], align 4
 // CHECK20-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 1
@@ -15028,96 +14888,82 @@ int bar(int n){
 // CHECK20-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 1
 // CHECK20-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to [10 x float]**
 // CHECK20-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP96]], align 4
-// CHECK20-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 40, i64* [[TMP97]], align 4
-// CHECK20-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP98]], align 4
-// CHECK20-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP100]], align 4
-// CHECK20-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP102]], align 4
-// CHECK20-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 4, i64* [[TMP103]], align 4
-// CHECK20-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP104]], align 4
-// CHECK20-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP97]], align 4
+// CHECK20-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP99]], align 4
+// CHECK20-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP101]], align 4
+// CHECK20-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP102]], align 4
+// CHECK20-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to float**
+// CHECK20-NEXT:    store float* [[VLA]], float** [[TMP104]], align 4
+// CHECK20-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 3
 // CHECK20-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to float**
 // CHECK20-NEXT:    store float* [[VLA]], float** [[TMP106]], align 4
-// CHECK20-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to float**
-// CHECK20-NEXT:    store float* [[VLA]], float** [[TMP108]], align 4
-// CHECK20-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK20-NEXT:    store i64 [[TMP83]], i64* [[TMP109]], align 4
-// CHECK20-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 3
-// CHECK20-NEXT:    store i8* null, i8** [[TMP110]], align 4
-// CHECK20-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 4
+// CHECK20-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK20-NEXT:    store i64 [[TMP83]], i64* [[TMP107]], align 4
+// CHECK20-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 3
+// CHECK20-NEXT:    store i8* null, i8** [[TMP108]], align 4
+// CHECK20-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 4
+// CHECK20-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to [5 x [10 x double]]**
+// CHECK20-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP110]], align 4
+// CHECK20-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 4
 // CHECK20-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to [5 x [10 x double]]**
 // CHECK20-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP112]], align 4
-// CHECK20-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 4
-// CHECK20-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to [5 x [10 x double]]**
-// CHECK20-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP114]], align 4
-// CHECK20-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK20-NEXT:    store i64 400, i64* [[TMP115]], align 4
-// CHECK20-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 4
-// CHECK20-NEXT:    store i8* null, i8** [[TMP116]], align 4
-// CHECK20-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 5
-// CHECK20-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to i32*
-// CHECK20-NEXT:    store i32 5, i32* [[TMP118]], align 4
-// CHECK20-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 5
+// CHECK20-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 4
+// CHECK20-NEXT:    store i8* null, i8** [[TMP113]], align 4
+// CHECK20-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 5
+// CHECK20-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32*
+// CHECK20-NEXT:    store i32 5, i32* [[TMP115]], align 4
+// CHECK20-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 5
+// CHECK20-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32*
+// CHECK20-NEXT:    store i32 5, i32* [[TMP117]], align 4
+// CHECK20-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 5
+// CHECK20-NEXT:    store i8* null, i8** [[TMP118]], align 4
+// CHECK20-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 6
 // CHECK20-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i32*
-// CHECK20-NEXT:    store i32 5, i32* [[TMP120]], align 4
-// CHECK20-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK20-NEXT:    store i64 4, i64* [[TMP121]], align 4
-// CHECK20-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 5
-// CHECK20-NEXT:    store i8* null, i8** [[TMP122]], align 4
-// CHECK20-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 6
-// CHECK20-NEXT:    [[TMP124:%.*]] = bitcast i8** [[TMP123]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP124]], align 4
-// CHECK20-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 6
-// CHECK20-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP126]], align 4
-// CHECK20-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK20-NEXT:    store i64 4, i64* [[TMP127]], align 4
-// CHECK20-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 6
-// CHECK20-NEXT:    store i8* null, i8** [[TMP128]], align 4
-// CHECK20-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 7
-// CHECK20-NEXT:    [[TMP130:%.*]] = bitcast i8** [[TMP129]] to double**
-// CHECK20-NEXT:    store double* [[VLA1]], double** [[TMP130]], align 4
-// CHECK20-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 7
-// CHECK20-NEXT:    [[TMP132:%.*]] = bitcast i8** [[TMP131]] to double**
-// CHECK20-NEXT:    store double* [[VLA1]], double** [[TMP132]], align 4
-// CHECK20-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK20-NEXT:    store i64 [[TMP86]], i64* [[TMP133]], align 4
-// CHECK20-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 7
+// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP120]], align 4
+// CHECK20-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 6
+// CHECK20-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP122]], align 4
+// CHECK20-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 6
+// CHECK20-NEXT:    store i8* null, i8** [[TMP123]], align 4
+// CHECK20-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 7
+// CHECK20-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to double**
+// CHECK20-NEXT:    store double* [[VLA1]], double** [[TMP125]], align 4
+// CHECK20-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 7
+// CHECK20-NEXT:    [[TMP127:%.*]] = bitcast i8** [[TMP126]] to double**
+// CHECK20-NEXT:    store double* [[VLA1]], double** [[TMP127]], align 4
+// CHECK20-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK20-NEXT:    store i64 [[TMP86]], i64* [[TMP128]], align 4
+// CHECK20-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 7
+// CHECK20-NEXT:    store i8* null, i8** [[TMP129]], align 4
+// CHECK20-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 8
+// CHECK20-NEXT:    [[TMP131:%.*]] = bitcast i8** [[TMP130]] to %struct.TT**
+// CHECK20-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP131]], align 4
+// CHECK20-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 8
+// CHECK20-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to %struct.TT**
+// CHECK20-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP133]], align 4
+// CHECK20-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 8
 // CHECK20-NEXT:    store i8* null, i8** [[TMP134]], align 4
-// CHECK20-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 8
-// CHECK20-NEXT:    [[TMP136:%.*]] = bitcast i8** [[TMP135]] to %struct.TT**
-// CHECK20-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP136]], align 4
-// CHECK20-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 8
-// CHECK20-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to %struct.TT**
-// CHECK20-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP138]], align 4
-// CHECK20-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK20-NEXT:    store i64 12, i64* [[TMP139]], align 4
-// CHECK20-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 8
-// CHECK20-NEXT:    store i8* null, i8** [[TMP140]], align 4
-// CHECK20-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 9
-// CHECK20-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP80]], i32* [[TMP142]], align 4
-// CHECK20-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 9
-// CHECK20-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP80]], i32* [[TMP144]], align 4
-// CHECK20-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK20-NEXT:    store i64 4, i64* [[TMP145]], align 4
-// CHECK20-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 9
-// CHECK20-NEXT:    store i8* null, i8** [[TMP146]], align 4
-// CHECK20-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP150:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170.region_id, i32 10, i8** [[TMP147]], i8** [[TMP148]], i64* [[TMP149]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK20-NEXT:    [[TMP151:%.*]] = icmp ne i32 [[TMP150]], 0
-// CHECK20-NEXT:    br i1 [[TMP151]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
+// CHECK20-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 9
+// CHECK20-NEXT:    [[TMP136:%.*]] = bitcast i8** [[TMP135]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP80]], i32* [[TMP136]], align 4
+// CHECK20-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 9
+// CHECK20-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP80]], i32* [[TMP138]], align 4
+// CHECK20-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 9
+// CHECK20-NEXT:    store i8* null, i8** [[TMP139]], align 4
+// CHECK20-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP143:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170.region_id, i32 10, i8** [[TMP140]], i8** [[TMP141]], i64* [[TMP142]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK20-NEXT:    [[TMP144:%.*]] = icmp ne i32 [[TMP143]], 0
+// CHECK20-NEXT:    br i1 [[TMP144]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK20:       omp_offload.failed17:
 // CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170(i32 [[TMP78]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]], i32 [[TMP80]]) #[[ATTR4]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT18]]
@@ -15127,10 +14973,10 @@ int bar(int n){
 // CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l170(i32 [[TMP78]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]], i32 [[TMP80]]) #[[ATTR4]]
 // CHECK20-NEXT:    br label [[OMP_IF_END20]]
 // CHECK20:       omp_if.end20:
-// CHECK20-NEXT:    [[TMP152:%.*]] = load i32, i32* [[A]], align 4
-// CHECK20-NEXT:    [[TMP153:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP153]])
-// CHECK20-NEXT:    ret i32 [[TMP152]]
+// CHECK20-NEXT:    [[TMP145:%.*]] = load i32, i32* [[A]], align 4
+// CHECK20-NEXT:    [[TMP146:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP146]])
+// CHECK20-NEXT:    ret i32 [[TMP145]]
 //
 //
 // CHECK20-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103
@@ -15905,14 +15751,14 @@ int bar(int n){
 // CHECK20-NEXT:    [[TMP7:%.*]] = mul nuw i32 2, [[TMP1]]
 // CHECK20-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 2
 // CHECK20-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-// CHECK20-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK20-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
-// CHECK20-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK20-NEXT:    store double* [[A]], double** [[TMP13]], align 4
-// CHECK20-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 8, i64* [[TMP14]], align 4
+// CHECK20-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.11 to i8*), i32 40, i1 false)
+// CHECK20-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK20-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 4
+// CHECK20-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK20-NEXT:    store double* [[A]], double** [[TMP14]], align 4
 // CHECK20-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK20-NEXT:    store i8* null, i8** [[TMP15]], align 4
 // CHECK20-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -15921,46 +15767,40 @@ int bar(int n){
 // CHECK20-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK20-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32*
 // CHECK20-NEXT:    store i32 [[TMP5]], i32* [[TMP19]], align 4
-// CHECK20-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP20]], align 4
-// CHECK20-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP21]], align 4
-// CHECK20-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
-// CHECK20-NEXT:    store i32 2, i32* [[TMP23]], align 4
-// CHECK20-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
-// CHECK20-NEXT:    store i32 2, i32* [[TMP25]], align 4
-// CHECK20-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 4, i64* [[TMP26]], align 4
-// CHECK20-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP27]], align 4
-// CHECK20-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP20]], align 4
+// CHECK20-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
+// CHECK20-NEXT:    store i32 2, i32* [[TMP22]], align 4
+// CHECK20-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
+// CHECK20-NEXT:    store i32 2, i32* [[TMP24]], align 4
+// CHECK20-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP25]], align 4
+// CHECK20-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP27]], align 4
+// CHECK20-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK20-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
 // CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP29]], align 4
-// CHECK20-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP31]], align 4
-// CHECK20-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK20-NEXT:    store i64 4, i64* [[TMP32]], align 4
-// CHECK20-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK20-NEXT:    store i8* null, i8** [[TMP33]], align 4
-// CHECK20-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK20-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK20-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 4
-// CHECK20-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK20-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK20-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 4
-// CHECK20-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK20-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 4
-// CHECK20-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK20-NEXT:    store i8* null, i8** [[TMP39]], align 4
-// CHECK20-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK20-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK20-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK20-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK20-NEXT:    store i8* null, i8** [[TMP30]], align 4
+// CHECK20-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK20-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK20-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 4
+// CHECK20-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK20-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK20-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 4
+// CHECK20-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK20-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 4
+// CHECK20-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK20-NEXT:    store i8* null, i8** [[TMP36]], align 4
+// CHECK20-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK20-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK20-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK20:       omp_offload.failed:
 // CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -15970,15 +15810,15 @@ int bar(int n){
 // CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l242(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK20-NEXT:    br label [[OMP_IF_END]]
 // CHECK20:       omp_if.end:
-// CHECK20-NEXT:    [[TMP45:%.*]] = mul nsw i32 1, [[TMP1]]
-// CHECK20-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP45]]
+// CHECK20-NEXT:    [[TMP42:%.*]] = mul nsw i32 1, [[TMP1]]
+// CHECK20-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP42]]
 // CHECK20-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
-// CHECK20-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK20-NEXT:    [[CONV:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK20-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK20-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP47]]
-// CHECK20-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK20-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK20-NEXT:    [[CONV:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK20-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK20-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP44]]
+// CHECK20-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK20-NEXT:    ret i32 [[ADD3]]
 //
 //
@@ -16049,7 +15889,7 @@ int bar(int n){
 // CHECK20-NEXT:    store i8* null, i8** [[TMP26]], align 4
 // CHECK20-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l224.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK20-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l224.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK20-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK20-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK20:       omp_offload.failed:
@@ -16117,7 +15957,7 @@ int bar(int n){
 // CHECK20-NEXT:    store i8* null, i8** [[TMP19]], align 4
 // CHECK20-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l207.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.15, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK20-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l207.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK20-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK20-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK20:       omp_offload.failed:
@@ -16154,11 +15994,11 @@ int bar(int n){
 // CHECK20-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
 // CHECK20-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16274,11 +16114,11 @@ int bar(int n){
 // CHECK20-NEXT:    [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
 // CHECK20-NEXT:    store i8 [[TMP5]], i8* [[CONV3]], align 1
 // CHECK20-NEXT:    [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16321,11 +16161,11 @@ int bar(int n){
 // CHECK20-NEXT:    [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
 // CHECK20-NEXT:    store i16 [[TMP3]], i16* [[CONV1]], align 2
 // CHECK20-NEXT:    [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4

diff  --git a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
index cf47e9d0a2504..63ec2e990c2b6 100644
--- a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
@@ -468,14 +468,14 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP65:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK1-NEXT:    [[TMP66:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK1-NEXT:    [[TMP67:%.*]] = mul nuw i64 [[TMP66]], 8
-// CHECK1-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP61]], i64* [[TMP69]], align 8
-// CHECK1-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP61]], i64* [[TMP71]], align 8
-// CHECK1-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    store i64 4, i64* [[TMP72]], align 8
+// CHECK1-NEXT:    [[TMP68:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP68]], i8* align 8 bitcast ([10 x i64]* @.offload_sizes.8 to i8*), i64 80, i1 false)
+// CHECK1-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP61]], i64* [[TMP70]], align 8
+// CHECK1-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP61]], i64* [[TMP72]], align 8
 // CHECK1-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0
 // CHECK1-NEXT:    store i8* null, i8** [[TMP73]], align 8
 // CHECK1-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 1
@@ -484,96 +484,82 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to [10 x float]**
 // CHECK1-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP77]], align 8
-// CHECK1-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK1-NEXT:    store i64 40, i64* [[TMP78]], align 8
-// CHECK1-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
-// CHECK1-NEXT:    store i8* null, i8** [[TMP79]], align 8
-// CHECK1-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
-// CHECK1-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP81]], align 8
-// CHECK1-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
-// CHECK1-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP83]], align 8
-// CHECK1-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK1-NEXT:    store i64 8, i64* [[TMP84]], align 8
-// CHECK1-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
-// CHECK1-NEXT:    store i8* null, i8** [[TMP85]], align 8
-// CHECK1-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
+// CHECK1-NEXT:    store i8* null, i8** [[TMP78]], align 8
+// CHECK1-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP80]], align 8
+// CHECK1-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP82]], align 8
+// CHECK1-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
+// CHECK1-NEXT:    store i8* null, i8** [[TMP83]], align 8
+// CHECK1-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to float**
+// CHECK1-NEXT:    store float* [[VLA]], float** [[TMP85]], align 8
+// CHECK1-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
 // CHECK1-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to float**
 // CHECK1-NEXT:    store float* [[VLA]], float** [[TMP87]], align 8
-// CHECK1-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
-// CHECK1-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to float**
-// CHECK1-NEXT:    store float* [[VLA]], float** [[TMP89]], align 8
-// CHECK1-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK1-NEXT:    store i64 [[TMP65]], i64* [[TMP90]], align 8
-// CHECK1-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 3
-// CHECK1-NEXT:    store i8* null, i8** [[TMP91]], align 8
-// CHECK1-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK1-NEXT:    store i64 [[TMP65]], i64* [[TMP88]], align 8
+// CHECK1-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 3
+// CHECK1-NEXT:    store i8* null, i8** [[TMP89]], align 8
+// CHECK1-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to [5 x [10 x double]]**
+// CHECK1-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP91]], align 8
+// CHECK1-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
 // CHECK1-NEXT:    [[TMP93:%.*]] = bitcast i8** [[TMP92]] to [5 x [10 x double]]**
 // CHECK1-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP93]], align 8
-// CHECK1-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
-// CHECK1-NEXT:    [[TMP95:%.*]] = bitcast i8** [[TMP94]] to [5 x [10 x double]]**
-// CHECK1-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP95]], align 8
-// CHECK1-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK1-NEXT:    store i64 400, i64* [[TMP96]], align 8
-// CHECK1-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 4
-// CHECK1-NEXT:    store i8* null, i8** [[TMP97]], align 8
-// CHECK1-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 5
-// CHECK1-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i64*
-// CHECK1-NEXT:    store i64 5, i64* [[TMP99]], align 8
-// CHECK1-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 4
+// CHECK1-NEXT:    store i8* null, i8** [[TMP94]], align 8
+// CHECK1-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i64*
+// CHECK1-NEXT:    store i64 5, i64* [[TMP96]], align 8
+// CHECK1-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to i64*
+// CHECK1-NEXT:    store i64 5, i64* [[TMP98]], align 8
+// CHECK1-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 5
+// CHECK1-NEXT:    store i8* null, i8** [[TMP99]], align 8
+// CHECK1-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 6
 // CHECK1-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i64*
-// CHECK1-NEXT:    store i64 5, i64* [[TMP101]], align 8
-// CHECK1-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK1-NEXT:    store i64 8, i64* [[TMP102]], align 8
-// CHECK1-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 5
-// CHECK1-NEXT:    store i8* null, i8** [[TMP103]], align 8
-// CHECK1-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 6
-// CHECK1-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP105]], align 8
-// CHECK1-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 6
-// CHECK1-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP107]], align 8
-// CHECK1-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK1-NEXT:    store i64 8, i64* [[TMP108]], align 8
-// CHECK1-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 6
-// CHECK1-NEXT:    store i8* null, i8** [[TMP109]], align 8
-// CHECK1-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 7
-// CHECK1-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to double**
-// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP111]], align 8
-// CHECK1-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 7
-// CHECK1-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to double**
-// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP113]], align 8
-// CHECK1-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK1-NEXT:    store i64 [[TMP67]], i64* [[TMP114]], align 8
-// CHECK1-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 7
+// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP101]], align 8
+// CHECK1-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 6
+// CHECK1-NEXT:    [[TMP103:%.*]] = bitcast i8** [[TMP102]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP103]], align 8
+// CHECK1-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 6
+// CHECK1-NEXT:    store i8* null, i8** [[TMP104]], align 8
+// CHECK1-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 7
+// CHECK1-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to double**
+// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP106]], align 8
+// CHECK1-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 7
+// CHECK1-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to double**
+// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP108]], align 8
+// CHECK1-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK1-NEXT:    store i64 [[TMP67]], i64* [[TMP109]], align 8
+// CHECK1-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 7
+// CHECK1-NEXT:    store i8* null, i8** [[TMP110]], align 8
+// CHECK1-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 8
+// CHECK1-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to %struct.TT**
+// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP112]], align 8
+// CHECK1-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 8
+// CHECK1-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to %struct.TT**
+// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP114]], align 8
+// CHECK1-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 8
 // CHECK1-NEXT:    store i8* null, i8** [[TMP115]], align 8
-// CHECK1-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 8
-// CHECK1-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to %struct.TT**
-// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP117]], align 8
-// CHECK1-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 8
-// CHECK1-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to %struct.TT**
-// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP119]], align 8
-// CHECK1-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK1-NEXT:    store i64 16, i64* [[TMP120]], align 8
-// CHECK1-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 8
-// CHECK1-NEXT:    store i8* null, i8** [[TMP121]], align 8
-// CHECK1-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 9
-// CHECK1-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP63]], i64* [[TMP123]], align 8
-// CHECK1-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 9
-// CHECK1-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP63]], i64* [[TMP125]], align 8
-// CHECK1-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK1-NEXT:    store i64 4, i64* [[TMP126]], align 8
-// CHECK1-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 9
-// CHECK1-NEXT:    store i8* null, i8** [[TMP127]], align 8
-// CHECK1-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP131:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140.region_id, i32 10, i8** [[TMP128]], i8** [[TMP129]], i64* [[TMP130]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK1-NEXT:    [[TMP132:%.*]] = icmp ne i32 [[TMP131]], 0
-// CHECK1-NEXT:    br i1 [[TMP132]], label [[OMP_OFFLOAD_FAILED23:%.*]], label [[OMP_OFFLOAD_CONT24:%.*]]
+// CHECK1-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 9
+// CHECK1-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP63]], i64* [[TMP117]], align 8
+// CHECK1-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 9
+// CHECK1-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP63]], i64* [[TMP119]], align 8
+// CHECK1-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 9
+// CHECK1-NEXT:    store i8* null, i8** [[TMP120]], align 8
+// CHECK1-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP124:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140.region_id, i32 10, i8** [[TMP121]], i8** [[TMP122]], i64* [[TMP123]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK1-NEXT:    [[TMP125:%.*]] = icmp ne i32 [[TMP124]], 0
+// CHECK1-NEXT:    br i1 [[TMP125]], label [[OMP_OFFLOAD_FAILED23:%.*]], label [[OMP_OFFLOAD_CONT24:%.*]]
 // CHECK1:       omp_offload.failed23:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140(i64 [[TMP61]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]], i64 [[TMP63]]) #[[ATTR4]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT24]]
@@ -583,10 +569,10 @@ int bar(int n){
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140(i64 [[TMP61]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]], i64 [[TMP63]]) #[[ATTR4]]
 // CHECK1-NEXT:    br label [[OMP_IF_END26]]
 // CHECK1:       omp_if.end26:
-// CHECK1-NEXT:    [[TMP133:%.*]] = load i32, i32* [[A]], align 4
-// CHECK1-NEXT:    [[TMP134:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP134]])
-// CHECK1-NEXT:    ret i32 [[TMP133]]
+// CHECK1-NEXT:    [[TMP126:%.*]] = load i32, i32* [[A]], align 4
+// CHECK1-NEXT:    [[TMP127:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP127]])
+// CHECK1-NEXT:    ret i32 [[TMP126]]
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96
@@ -1339,14 +1325,14 @@ int bar(int n){
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP8:%.*]] = mul nuw i64 2, [[TMP2]]
 // CHECK1-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2
-// CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK1-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
-// CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK1-NEXT:    store double* [[A]], double** [[TMP13]], align 8
-// CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    store i64 8, i64* [[TMP14]], align 8
+// CHECK1-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.11 to i8*), i64 40, i1 false)
+// CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK1-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 8
+// CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK1-NEXT:    store double* [[A]], double** [[TMP14]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK1-NEXT:    store i8* null, i8** [[TMP15]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1355,46 +1341,40 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i64*
 // CHECK1-NEXT:    store i64 [[TMP6]], i64* [[TMP19]], align 8
-// CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK1-NEXT:    store i64 4, i64* [[TMP20]], align 8
-// CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK1-NEXT:    store i8* null, i8** [[TMP21]], align 8
-// CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK1-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
-// CHECK1-NEXT:    store i64 2, i64* [[TMP23]], align 8
-// CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK1-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK1-NEXT:    store i64 2, i64* [[TMP25]], align 8
-// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK1-NEXT:    store i64 8, i64* [[TMP26]], align 8
-// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK1-NEXT:    store i8* null, i8** [[TMP27]], align 8
-// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK1-NEXT:    store i8* null, i8** [[TMP20]], align 8
+// CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
+// CHECK1-NEXT:    store i64 2, i64* [[TMP22]], align 8
+// CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK1-NEXT:    store i64 2, i64* [[TMP24]], align 8
+// CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK1-NEXT:    store i8* null, i8** [[TMP25]], align 8
+// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP27]], align 8
+// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK1-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
 // CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP29]], align 8
-// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK1-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP31]], align 8
-// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK1-NEXT:    store i64 8, i64* [[TMP32]], align 8
-// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK1-NEXT:    store i8* null, i8** [[TMP33]], align 8
-// CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK1-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 8
-// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK1-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 8
-// CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK1-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 8
-// CHECK1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK1-NEXT:    store i8* null, i8** [[TMP39]], align 8
-// CHECK1-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK1-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK1-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK1-NEXT:    store i8* null, i8** [[TMP30]], align 8
+// CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 8
+// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 8
+// CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK1-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 8
+// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK1-NEXT:    store i8* null, i8** [[TMP36]], align 8
+// CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK1-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK1-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -1404,15 +1384,15 @@ int bar(int n){
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK1-NEXT:    br label [[OMP_IF_END]]
 // CHECK1:       omp_if.end:
-// CHECK1-NEXT:    [[TMP45:%.*]] = mul nsw i64 1, [[TMP2]]
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP45]]
+// CHECK1-NEXT:    [[TMP42:%.*]] = mul nsw i64 1, [[TMP2]]
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP42]]
 // CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
-// CHECK1-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK1-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK1-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP47]]
-// CHECK1-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK1-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK1-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK1-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP44]]
+// CHECK1-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK1-NEXT:    ret i32 [[ADD4]]
 //
 //
@@ -1484,7 +1464,7 @@ int bar(int n){
 // CHECK1-NEXT:    store i8* null, i8** [[TMP26]], align 8
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l195.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK1-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l195.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK1-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK1-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
@@ -1553,7 +1533,7 @@ int bar(int n){
 // CHECK1-NEXT:    store i8* null, i8** [[TMP19]], align 8
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l178.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.15, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK1-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l178.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK1-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK1-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
@@ -1592,11 +1572,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32*
 // CHECK1-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK1-NEXT:    [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8
-// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1722,11 +1702,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
 // CHECK1-NEXT:    store i8 [[TMP5]], i8* [[CONV5]], align 1
 // CHECK1-NEXT:    [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
-// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1772,11 +1752,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
 // CHECK1-NEXT:    store i16 [[TMP3]], i16* [[CONV3]], align 2
 // CHECK1-NEXT:    [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8
-// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1865,7 +1845,7 @@ int bar(int n){
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK1-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK1-SAME: () #[[ATTR8:[0-9]+]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK1-NEXT:    ret void
@@ -2046,14 +2026,14 @@ int bar(int n){
 // CHECK2-NEXT:    [[TMP65:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK2-NEXT:    [[TMP66:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK2-NEXT:    [[TMP67:%.*]] = mul nuw i64 [[TMP66]], 8
-// CHECK2-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP61]], i64* [[TMP69]], align 8
-// CHECK2-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP61]], i64* [[TMP71]], align 8
-// CHECK2-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK2-NEXT:    store i64 4, i64* [[TMP72]], align 8
+// CHECK2-NEXT:    [[TMP68:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP68]], i8* align 8 bitcast ([10 x i64]* @.offload_sizes.8 to i8*), i64 80, i1 false)
+// CHECK2-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP61]], i64* [[TMP70]], align 8
+// CHECK2-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP61]], i64* [[TMP72]], align 8
 // CHECK2-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0
 // CHECK2-NEXT:    store i8* null, i8** [[TMP73]], align 8
 // CHECK2-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 1
@@ -2062,96 +2042,82 @@ int bar(int n){
 // CHECK2-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 1
 // CHECK2-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to [10 x float]**
 // CHECK2-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP77]], align 8
-// CHECK2-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK2-NEXT:    store i64 40, i64* [[TMP78]], align 8
-// CHECK2-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
-// CHECK2-NEXT:    store i8* null, i8** [[TMP79]], align 8
-// CHECK2-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
-// CHECK2-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP81]], align 8
-// CHECK2-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
-// CHECK2-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP83]], align 8
-// CHECK2-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK2-NEXT:    store i64 8, i64* [[TMP84]], align 8
-// CHECK2-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
-// CHECK2-NEXT:    store i8* null, i8** [[TMP85]], align 8
-// CHECK2-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
+// CHECK2-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
+// CHECK2-NEXT:    store i8* null, i8** [[TMP78]], align 8
+// CHECK2-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP80]], align 8
+// CHECK2-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP82]], align 8
+// CHECK2-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
+// CHECK2-NEXT:    store i8* null, i8** [[TMP83]], align 8
+// CHECK2-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
+// CHECK2-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to float**
+// CHECK2-NEXT:    store float* [[VLA]], float** [[TMP85]], align 8
+// CHECK2-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
 // CHECK2-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to float**
 // CHECK2-NEXT:    store float* [[VLA]], float** [[TMP87]], align 8
-// CHECK2-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
-// CHECK2-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to float**
-// CHECK2-NEXT:    store float* [[VLA]], float** [[TMP89]], align 8
-// CHECK2-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK2-NEXT:    store i64 [[TMP65]], i64* [[TMP90]], align 8
-// CHECK2-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 3
-// CHECK2-NEXT:    store i8* null, i8** [[TMP91]], align 8
-// CHECK2-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
+// CHECK2-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK2-NEXT:    store i64 [[TMP65]], i64* [[TMP88]], align 8
+// CHECK2-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 3
+// CHECK2-NEXT:    store i8* null, i8** [[TMP89]], align 8
+// CHECK2-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
+// CHECK2-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to [5 x [10 x double]]**
+// CHECK2-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP91]], align 8
+// CHECK2-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
 // CHECK2-NEXT:    [[TMP93:%.*]] = bitcast i8** [[TMP92]] to [5 x [10 x double]]**
 // CHECK2-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP93]], align 8
-// CHECK2-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
-// CHECK2-NEXT:    [[TMP95:%.*]] = bitcast i8** [[TMP94]] to [5 x [10 x double]]**
-// CHECK2-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP95]], align 8
-// CHECK2-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK2-NEXT:    store i64 400, i64* [[TMP96]], align 8
-// CHECK2-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 4
-// CHECK2-NEXT:    store i8* null, i8** [[TMP97]], align 8
-// CHECK2-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 5
-// CHECK2-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i64*
-// CHECK2-NEXT:    store i64 5, i64* [[TMP99]], align 8
-// CHECK2-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 5
+// CHECK2-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 4
+// CHECK2-NEXT:    store i8* null, i8** [[TMP94]], align 8
+// CHECK2-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 5
+// CHECK2-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i64*
+// CHECK2-NEXT:    store i64 5, i64* [[TMP96]], align 8
+// CHECK2-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 5
+// CHECK2-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to i64*
+// CHECK2-NEXT:    store i64 5, i64* [[TMP98]], align 8
+// CHECK2-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 5
+// CHECK2-NEXT:    store i8* null, i8** [[TMP99]], align 8
+// CHECK2-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 6
 // CHECK2-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i64*
-// CHECK2-NEXT:    store i64 5, i64* [[TMP101]], align 8
-// CHECK2-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK2-NEXT:    store i64 8, i64* [[TMP102]], align 8
-// CHECK2-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 5
-// CHECK2-NEXT:    store i8* null, i8** [[TMP103]], align 8
-// CHECK2-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 6
-// CHECK2-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP5]], i64* [[TMP105]], align 8
-// CHECK2-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 6
-// CHECK2-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP5]], i64* [[TMP107]], align 8
-// CHECK2-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK2-NEXT:    store i64 8, i64* [[TMP108]], align 8
-// CHECK2-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 6
-// CHECK2-NEXT:    store i8* null, i8** [[TMP109]], align 8
-// CHECK2-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 7
-// CHECK2-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to double**
-// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP111]], align 8
-// CHECK2-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 7
-// CHECK2-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to double**
-// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP113]], align 8
-// CHECK2-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK2-NEXT:    store i64 [[TMP67]], i64* [[TMP114]], align 8
-// CHECK2-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 7
+// CHECK2-NEXT:    store i64 [[TMP5]], i64* [[TMP101]], align 8
+// CHECK2-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 6
+// CHECK2-NEXT:    [[TMP103:%.*]] = bitcast i8** [[TMP102]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP5]], i64* [[TMP103]], align 8
+// CHECK2-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 6
+// CHECK2-NEXT:    store i8* null, i8** [[TMP104]], align 8
+// CHECK2-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 7
+// CHECK2-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to double**
+// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP106]], align 8
+// CHECK2-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 7
+// CHECK2-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to double**
+// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP108]], align 8
+// CHECK2-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK2-NEXT:    store i64 [[TMP67]], i64* [[TMP109]], align 8
+// CHECK2-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 7
+// CHECK2-NEXT:    store i8* null, i8** [[TMP110]], align 8
+// CHECK2-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 8
+// CHECK2-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to %struct.TT**
+// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP112]], align 8
+// CHECK2-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 8
+// CHECK2-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to %struct.TT**
+// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP114]], align 8
+// CHECK2-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 8
 // CHECK2-NEXT:    store i8* null, i8** [[TMP115]], align 8
-// CHECK2-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 8
-// CHECK2-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to %struct.TT**
-// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP117]], align 8
-// CHECK2-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 8
-// CHECK2-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to %struct.TT**
-// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP119]], align 8
-// CHECK2-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK2-NEXT:    store i64 16, i64* [[TMP120]], align 8
-// CHECK2-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 8
-// CHECK2-NEXT:    store i8* null, i8** [[TMP121]], align 8
-// CHECK2-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 9
-// CHECK2-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP63]], i64* [[TMP123]], align 8
-// CHECK2-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 9
-// CHECK2-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP63]], i64* [[TMP125]], align 8
-// CHECK2-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK2-NEXT:    store i64 4, i64* [[TMP126]], align 8
-// CHECK2-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 9
-// CHECK2-NEXT:    store i8* null, i8** [[TMP127]], align 8
-// CHECK2-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP131:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140.region_id, i32 10, i8** [[TMP128]], i8** [[TMP129]], i64* [[TMP130]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK2-NEXT:    [[TMP132:%.*]] = icmp ne i32 [[TMP131]], 0
-// CHECK2-NEXT:    br i1 [[TMP132]], label [[OMP_OFFLOAD_FAILED23:%.*]], label [[OMP_OFFLOAD_CONT24:%.*]]
+// CHECK2-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 9
+// CHECK2-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP63]], i64* [[TMP117]], align 8
+// CHECK2-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 9
+// CHECK2-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP63]], i64* [[TMP119]], align 8
+// CHECK2-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 9
+// CHECK2-NEXT:    store i8* null, i8** [[TMP120]], align 8
+// CHECK2-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP124:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140.region_id, i32 10, i8** [[TMP121]], i8** [[TMP122]], i64* [[TMP123]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK2-NEXT:    [[TMP125:%.*]] = icmp ne i32 [[TMP124]], 0
+// CHECK2-NEXT:    br i1 [[TMP125]], label [[OMP_OFFLOAD_FAILED23:%.*]], label [[OMP_OFFLOAD_CONT24:%.*]]
 // CHECK2:       omp_offload.failed23:
 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140(i64 [[TMP61]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]], i64 [[TMP63]]) #[[ATTR4]]
 // CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT24]]
@@ -2161,10 +2127,10 @@ int bar(int n){
 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140(i64 [[TMP61]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]], i64 [[TMP63]]) #[[ATTR4]]
 // CHECK2-NEXT:    br label [[OMP_IF_END26]]
 // CHECK2:       omp_if.end26:
-// CHECK2-NEXT:    [[TMP133:%.*]] = load i32, i32* [[A]], align 4
-// CHECK2-NEXT:    [[TMP134:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP134]])
-// CHECK2-NEXT:    ret i32 [[TMP133]]
+// CHECK2-NEXT:    [[TMP126:%.*]] = load i32, i32* [[A]], align 4
+// CHECK2-NEXT:    [[TMP127:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP127]])
+// CHECK2-NEXT:    ret i32 [[TMP126]]
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96
@@ -2917,14 +2883,14 @@ int bar(int n){
 // CHECK2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP8:%.*]] = mul nuw i64 2, [[TMP2]]
 // CHECK2-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2
-// CHECK2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK2-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
-// CHECK2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK2-NEXT:    store double* [[A]], double** [[TMP13]], align 8
-// CHECK2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK2-NEXT:    store i64 8, i64* [[TMP14]], align 8
+// CHECK2-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.11 to i8*), i64 40, i1 false)
+// CHECK2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK2-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 8
+// CHECK2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK2-NEXT:    store double* [[A]], double** [[TMP14]], align 8
 // CHECK2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK2-NEXT:    store i8* null, i8** [[TMP15]], align 8
 // CHECK2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2933,46 +2899,40 @@ int bar(int n){
 // CHECK2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK2-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i64*
 // CHECK2-NEXT:    store i64 [[TMP6]], i64* [[TMP19]], align 8
-// CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK2-NEXT:    store i64 4, i64* [[TMP20]], align 8
-// CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK2-NEXT:    store i8* null, i8** [[TMP21]], align 8
-// CHECK2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK2-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
-// CHECK2-NEXT:    store i64 2, i64* [[TMP23]], align 8
-// CHECK2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK2-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK2-NEXT:    store i64 2, i64* [[TMP25]], align 8
-// CHECK2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK2-NEXT:    store i64 8, i64* [[TMP26]], align 8
-// CHECK2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK2-NEXT:    store i8* null, i8** [[TMP27]], align 8
-// CHECK2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK2-NEXT:    store i8* null, i8** [[TMP20]], align 8
+// CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
+// CHECK2-NEXT:    store i64 2, i64* [[TMP22]], align 8
+// CHECK2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK2-NEXT:    store i64 2, i64* [[TMP24]], align 8
+// CHECK2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK2-NEXT:    store i8* null, i8** [[TMP25]], align 8
+// CHECK2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK2-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP27]], align 8
+// CHECK2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK2-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
 // CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP29]], align 8
-// CHECK2-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK2-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP31]], align 8
-// CHECK2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK2-NEXT:    store i64 8, i64* [[TMP32]], align 8
-// CHECK2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK2-NEXT:    store i8* null, i8** [[TMP33]], align 8
-// CHECK2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK2-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 8
-// CHECK2-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK2-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 8
-// CHECK2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK2-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 8
-// CHECK2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK2-NEXT:    store i8* null, i8** [[TMP39]], align 8
-// CHECK2-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK2-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK2-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK2-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK2-NEXT:    store i8* null, i8** [[TMP30]], align 8
+// CHECK2-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK2-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 8
+// CHECK2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK2-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 8
+// CHECK2-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK2-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 8
+// CHECK2-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK2-NEXT:    store i8* null, i8** [[TMP36]], align 8
+// CHECK2-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK2-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK2-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK2:       omp_offload.failed:
 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -2982,15 +2942,15 @@ int bar(int n){
 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK2-NEXT:    br label [[OMP_IF_END]]
 // CHECK2:       omp_if.end:
-// CHECK2-NEXT:    [[TMP45:%.*]] = mul nsw i64 1, [[TMP2]]
-// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP45]]
+// CHECK2-NEXT:    [[TMP42:%.*]] = mul nsw i64 1, [[TMP2]]
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP42]]
 // CHECK2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
-// CHECK2-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK2-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK2-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP47]]
-// CHECK2-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK2-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK2-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK2-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP44]]
+// CHECK2-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK2-NEXT:    ret i32 [[ADD4]]
 //
 //
@@ -3062,7 +3022,7 @@ int bar(int n){
 // CHECK2-NEXT:    store i8* null, i8** [[TMP26]], align 8
 // CHECK2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l195.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK2-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l195.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK2-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK2-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK2:       omp_offload.failed:
@@ -3131,7 +3091,7 @@ int bar(int n){
 // CHECK2-NEXT:    store i8* null, i8** [[TMP19]], align 8
 // CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l178.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.15, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK2-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l178.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK2-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK2-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK2:       omp_offload.failed:
@@ -3170,11 +3130,11 @@ int bar(int n){
 // CHECK2-NEXT:    [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32*
 // CHECK2-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK2-NEXT:    [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3300,11 +3260,11 @@ int bar(int n){
 // CHECK2-NEXT:    [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
 // CHECK2-NEXT:    store i8 [[TMP5]], i8* [[CONV5]], align 1
 // CHECK2-NEXT:    [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3350,11 +3310,11 @@ int bar(int n){
 // CHECK2-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
 // CHECK2-NEXT:    store i16 [[TMP3]], i16* [[CONV3]], align 2
 // CHECK2-NEXT:    [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3443,7 +3403,7 @@ int bar(int n){
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK2-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK2-SAME: () #[[ATTR8:[0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK2-NEXT:    ret void
@@ -3614,14 +3574,14 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP63:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK3-NEXT:    [[TMP64:%.*]] = mul nuw i32 [[TMP63]], 8
 // CHECK3-NEXT:    [[TMP65:%.*]] = sext i32 [[TMP64]] to i64
-// CHECK3-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP57]], i32* [[TMP67]], align 4
-// CHECK3-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP57]], i32* [[TMP69]], align 4
-// CHECK3-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    store i64 4, i64* [[TMP70]], align 4
+// CHECK3-NEXT:    [[TMP66:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP66]], i8* align 4 bitcast ([10 x i64]* @.offload_sizes.8 to i8*), i32 80, i1 false)
+// CHECK3-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP57]], i32* [[TMP68]], align 4
+// CHECK3-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP57]], i32* [[TMP70]], align 4
 // CHECK3-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 0
 // CHECK3-NEXT:    store i8* null, i8** [[TMP71]], align 4
 // CHECK3-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 1
@@ -3630,96 +3590,82 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [10 x float]**
 // CHECK3-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP75]], align 4
-// CHECK3-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK3-NEXT:    store i64 40, i64* [[TMP76]], align 4
-// CHECK3-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 1
-// CHECK3-NEXT:    store i8* null, i8** [[TMP77]], align 4
-// CHECK3-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 2
-// CHECK3-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP79]], align 4
-// CHECK3-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 2
-// CHECK3-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP81]], align 4
-// CHECK3-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK3-NEXT:    store i64 4, i64* [[TMP82]], align 4
-// CHECK3-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 2
-// CHECK3-NEXT:    store i8* null, i8** [[TMP83]], align 4
-// CHECK3-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 1
+// CHECK3-NEXT:    store i8* null, i8** [[TMP76]], align 4
+// CHECK3-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP78]], align 4
+// CHECK3-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP80]], align 4
+// CHECK3-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 2
+// CHECK3-NEXT:    store i8* null, i8** [[TMP81]], align 4
+// CHECK3-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to float**
+// CHECK3-NEXT:    store float* [[VLA]], float** [[TMP83]], align 4
+// CHECK3-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 3
 // CHECK3-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to float**
 // CHECK3-NEXT:    store float* [[VLA]], float** [[TMP85]], align 4
-// CHECK3-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 3
-// CHECK3-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to float**
-// CHECK3-NEXT:    store float* [[VLA]], float** [[TMP87]], align 4
-// CHECK3-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK3-NEXT:    store i64 [[TMP62]], i64* [[TMP88]], align 4
-// CHECK3-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 3
-// CHECK3-NEXT:    store i8* null, i8** [[TMP89]], align 4
-// CHECK3-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK3-NEXT:    store i64 [[TMP62]], i64* [[TMP86]], align 4
+// CHECK3-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 3
+// CHECK3-NEXT:    store i8* null, i8** [[TMP87]], align 4
+// CHECK3-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to [5 x [10 x double]]**
+// CHECK3-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP89]], align 4
+// CHECK3-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 4
 // CHECK3-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to [5 x [10 x double]]**
 // CHECK3-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP91]], align 4
-// CHECK3-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 4
-// CHECK3-NEXT:    [[TMP93:%.*]] = bitcast i8** [[TMP92]] to [5 x [10 x double]]**
-// CHECK3-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP93]], align 4
-// CHECK3-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK3-NEXT:    store i64 400, i64* [[TMP94]], align 4
-// CHECK3-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 4
-// CHECK3-NEXT:    store i8* null, i8** [[TMP95]], align 4
-// CHECK3-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 5
-// CHECK3-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i32*
-// CHECK3-NEXT:    store i32 5, i32* [[TMP97]], align 4
-// CHECK3-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 5
+// CHECK3-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 4
+// CHECK3-NEXT:    store i8* null, i8** [[TMP92]], align 4
+// CHECK3-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 5
+// CHECK3-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to i32*
+// CHECK3-NEXT:    store i32 5, i32* [[TMP94]], align 4
+// CHECK3-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 5
+// CHECK3-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i32*
+// CHECK3-NEXT:    store i32 5, i32* [[TMP96]], align 4
+// CHECK3-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 5
+// CHECK3-NEXT:    store i8* null, i8** [[TMP97]], align 4
+// CHECK3-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 6
 // CHECK3-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i32*
-// CHECK3-NEXT:    store i32 5, i32* [[TMP99]], align 4
-// CHECK3-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK3-NEXT:    store i64 4, i64* [[TMP100]], align 4
-// CHECK3-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 5
-// CHECK3-NEXT:    store i8* null, i8** [[TMP101]], align 4
-// CHECK3-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 6
-// CHECK3-NEXT:    [[TMP103:%.*]] = bitcast i8** [[TMP102]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP103]], align 4
-// CHECK3-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 6
-// CHECK3-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP105]], align 4
-// CHECK3-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK3-NEXT:    store i64 4, i64* [[TMP106]], align 4
-// CHECK3-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 6
-// CHECK3-NEXT:    store i8* null, i8** [[TMP107]], align 4
-// CHECK3-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 7
-// CHECK3-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to double**
-// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP109]], align 4
-// CHECK3-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 7
-// CHECK3-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to double**
-// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP111]], align 4
-// CHECK3-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK3-NEXT:    store i64 [[TMP65]], i64* [[TMP112]], align 4
-// CHECK3-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 7
+// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP99]], align 4
+// CHECK3-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 6
+// CHECK3-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP101]], align 4
+// CHECK3-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 6
+// CHECK3-NEXT:    store i8* null, i8** [[TMP102]], align 4
+// CHECK3-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 7
+// CHECK3-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to double**
+// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP104]], align 4
+// CHECK3-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 7
+// CHECK3-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to double**
+// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP106]], align 4
+// CHECK3-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK3-NEXT:    store i64 [[TMP65]], i64* [[TMP107]], align 4
+// CHECK3-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 7
+// CHECK3-NEXT:    store i8* null, i8** [[TMP108]], align 4
+// CHECK3-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 8
+// CHECK3-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to %struct.TT**
+// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP110]], align 4
+// CHECK3-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 8
+// CHECK3-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to %struct.TT**
+// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP112]], align 4
+// CHECK3-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 8
 // CHECK3-NEXT:    store i8* null, i8** [[TMP113]], align 4
-// CHECK3-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 8
-// CHECK3-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to %struct.TT**
-// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP115]], align 4
-// CHECK3-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 8
-// CHECK3-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to %struct.TT**
-// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP117]], align 4
-// CHECK3-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK3-NEXT:    store i64 12, i64* [[TMP118]], align 4
-// CHECK3-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 8
-// CHECK3-NEXT:    store i8* null, i8** [[TMP119]], align 4
-// CHECK3-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 9
-// CHECK3-NEXT:    [[TMP121:%.*]] = bitcast i8** [[TMP120]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP59]], i32* [[TMP121]], align 4
-// CHECK3-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 9
-// CHECK3-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP59]], i32* [[TMP123]], align 4
-// CHECK3-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK3-NEXT:    store i64 4, i64* [[TMP124]], align 4
-// CHECK3-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 9
-// CHECK3-NEXT:    store i8* null, i8** [[TMP125]], align 4
-// CHECK3-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP129:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140.region_id, i32 10, i8** [[TMP126]], i8** [[TMP127]], i64* [[TMP128]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK3-NEXT:    [[TMP130:%.*]] = icmp ne i32 [[TMP129]], 0
-// CHECK3-NEXT:    br i1 [[TMP130]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
+// CHECK3-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 9
+// CHECK3-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP59]], i32* [[TMP115]], align 4
+// CHECK3-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 9
+// CHECK3-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP59]], i32* [[TMP117]], align 4
+// CHECK3-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 9
+// CHECK3-NEXT:    store i8* null, i8** [[TMP118]], align 4
+// CHECK3-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP122:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140.region_id, i32 10, i8** [[TMP119]], i8** [[TMP120]], i64* [[TMP121]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK3-NEXT:    [[TMP123:%.*]] = icmp ne i32 [[TMP122]], 0
+// CHECK3-NEXT:    br i1 [[TMP123]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK3:       omp_offload.failed17:
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140(i32 [[TMP57]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]], i32 [[TMP59]]) #[[ATTR4]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT18]]
@@ -3729,10 +3675,10 @@ int bar(int n){
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140(i32 [[TMP57]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]], i32 [[TMP59]]) #[[ATTR4]]
 // CHECK3-NEXT:    br label [[OMP_IF_END20]]
 // CHECK3:       omp_if.end20:
-// CHECK3-NEXT:    [[TMP131:%.*]] = load i32, i32* [[A]], align 4
-// CHECK3-NEXT:    [[TMP132:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP132]])
-// CHECK3-NEXT:    ret i32 [[TMP131]]
+// CHECK3-NEXT:    [[TMP124:%.*]] = load i32, i32* [[A]], align 4
+// CHECK3-NEXT:    [[TMP125:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP125]])
+// CHECK3-NEXT:    ret i32 [[TMP124]]
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96
@@ -4464,14 +4410,14 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP7:%.*]] = mul nuw i32 2, [[TMP1]]
 // CHECK3-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 2
 // CHECK3-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-// CHECK3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK3-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
-// CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK3-NEXT:    store double* [[A]], double** [[TMP13]], align 4
-// CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    store i64 8, i64* [[TMP14]], align 4
+// CHECK3-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.11 to i8*), i32 40, i1 false)
+// CHECK3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK3-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 4
+// CHECK3-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK3-NEXT:    store double* [[A]], double** [[TMP14]], align 4
 // CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK3-NEXT:    store i8* null, i8** [[TMP15]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -4480,46 +4426,40 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32*
 // CHECK3-NEXT:    store i32 [[TMP5]], i32* [[TMP19]], align 4
-// CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK3-NEXT:    store i64 4, i64* [[TMP20]], align 4
-// CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK3-NEXT:    store i8* null, i8** [[TMP21]], align 4
-// CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK3-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
-// CHECK3-NEXT:    store i32 2, i32* [[TMP23]], align 4
-// CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK3-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
-// CHECK3-NEXT:    store i32 2, i32* [[TMP25]], align 4
-// CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK3-NEXT:    store i64 4, i64* [[TMP26]], align 4
-// CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK3-NEXT:    store i8* null, i8** [[TMP27]], align 4
-// CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK3-NEXT:    store i8* null, i8** [[TMP20]], align 4
+// CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
+// CHECK3-NEXT:    store i32 2, i32* [[TMP22]], align 4
+// CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
+// CHECK3-NEXT:    store i32 2, i32* [[TMP24]], align 4
+// CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK3-NEXT:    store i8* null, i8** [[TMP25]], align 4
+// CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP27]], align 4
+// CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK3-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
 // CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP29]], align 4
-// CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK3-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP31]], align 4
-// CHECK3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK3-NEXT:    store i64 4, i64* [[TMP32]], align 4
-// CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK3-NEXT:    store i8* null, i8** [[TMP33]], align 4
-// CHECK3-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK3-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 4
-// CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK3-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 4
-// CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK3-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 4
-// CHECK3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK3-NEXT:    store i8* null, i8** [[TMP39]], align 4
-// CHECK3-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK3-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK3-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK3-NEXT:    store i8* null, i8** [[TMP30]], align 4
+// CHECK3-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 4
+// CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 4
+// CHECK3-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK3-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 4
+// CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK3-NEXT:    store i8* null, i8** [[TMP36]], align 4
+// CHECK3-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK3-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK3-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK3:       omp_offload.failed:
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -4529,15 +4469,15 @@ int bar(int n){
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK3-NEXT:    br label [[OMP_IF_END]]
 // CHECK3:       omp_if.end:
-// CHECK3-NEXT:    [[TMP45:%.*]] = mul nsw i32 1, [[TMP1]]
-// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP45]]
+// CHECK3-NEXT:    [[TMP42:%.*]] = mul nsw i32 1, [[TMP1]]
+// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP42]]
 // CHECK3-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
-// CHECK3-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK3-NEXT:    [[CONV:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK3-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP47]]
-// CHECK3-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK3-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK3-NEXT:    [[CONV:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK3-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP44]]
+// CHECK3-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK3-NEXT:    ret i32 [[ADD3]]
 //
 //
@@ -4608,7 +4548,7 @@ int bar(int n){
 // CHECK3-NEXT:    store i8* null, i8** [[TMP26]], align 4
 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l195.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK3-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l195.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK3-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK3-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK3:       omp_offload.failed:
@@ -4676,7 +4616,7 @@ int bar(int n){
 // CHECK3-NEXT:    store i8* null, i8** [[TMP19]], align 4
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l178.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.15, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK3-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l178.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK3-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK3-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK3:       omp_offload.failed:
@@ -4713,11 +4653,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
 // CHECK3-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -4840,11 +4780,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
 // CHECK3-NEXT:    store i8 [[TMP5]], i8* [[CONV3]], align 1
 // CHECK3-NEXT:    [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -4887,11 +4827,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
 // CHECK3-NEXT:    store i16 [[TMP3]], i16* [[CONV1]], align 2
 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -4979,7 +4919,7 @@ int bar(int n){
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK3-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK3-SAME: () #[[ATTR8:[0-9]+]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK3-NEXT:    ret void
@@ -5150,14 +5090,14 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP63:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK4-NEXT:    [[TMP64:%.*]] = mul nuw i32 [[TMP63]], 8
 // CHECK4-NEXT:    [[TMP65:%.*]] = sext i32 [[TMP64]] to i64
-// CHECK4-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP57]], i32* [[TMP67]], align 4
-// CHECK4-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP57]], i32* [[TMP69]], align 4
-// CHECK4-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK4-NEXT:    store i64 4, i64* [[TMP70]], align 4
+// CHECK4-NEXT:    [[TMP66:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK4-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP66]], i8* align 4 bitcast ([10 x i64]* @.offload_sizes.8 to i8*), i32 80, i1 false)
+// CHECK4-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP57]], i32* [[TMP68]], align 4
+// CHECK4-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP57]], i32* [[TMP70]], align 4
 // CHECK4-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 0
 // CHECK4-NEXT:    store i8* null, i8** [[TMP71]], align 4
 // CHECK4-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 1
@@ -5166,96 +5106,82 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 1
 // CHECK4-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [10 x float]**
 // CHECK4-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP75]], align 4
-// CHECK4-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK4-NEXT:    store i64 40, i64* [[TMP76]], align 4
-// CHECK4-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 1
-// CHECK4-NEXT:    store i8* null, i8** [[TMP77]], align 4
-// CHECK4-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 2
-// CHECK4-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP79]], align 4
-// CHECK4-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 2
-// CHECK4-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP81]], align 4
-// CHECK4-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK4-NEXT:    store i64 4, i64* [[TMP82]], align 4
-// CHECK4-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 2
-// CHECK4-NEXT:    store i8* null, i8** [[TMP83]], align 4
-// CHECK4-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 3
+// CHECK4-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 1
+// CHECK4-NEXT:    store i8* null, i8** [[TMP76]], align 4
+// CHECK4-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 2
+// CHECK4-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP78]], align 4
+// CHECK4-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 2
+// CHECK4-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP80]], align 4
+// CHECK4-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 2
+// CHECK4-NEXT:    store i8* null, i8** [[TMP81]], align 4
+// CHECK4-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 3
+// CHECK4-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to float**
+// CHECK4-NEXT:    store float* [[VLA]], float** [[TMP83]], align 4
+// CHECK4-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 3
 // CHECK4-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to float**
 // CHECK4-NEXT:    store float* [[VLA]], float** [[TMP85]], align 4
-// CHECK4-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 3
-// CHECK4-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to float**
-// CHECK4-NEXT:    store float* [[VLA]], float** [[TMP87]], align 4
-// CHECK4-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK4-NEXT:    store i64 [[TMP62]], i64* [[TMP88]], align 4
-// CHECK4-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 3
-// CHECK4-NEXT:    store i8* null, i8** [[TMP89]], align 4
-// CHECK4-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 4
+// CHECK4-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK4-NEXT:    store i64 [[TMP62]], i64* [[TMP86]], align 4
+// CHECK4-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 3
+// CHECK4-NEXT:    store i8* null, i8** [[TMP87]], align 4
+// CHECK4-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 4
+// CHECK4-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to [5 x [10 x double]]**
+// CHECK4-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP89]], align 4
+// CHECK4-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 4
 // CHECK4-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to [5 x [10 x double]]**
 // CHECK4-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP91]], align 4
-// CHECK4-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 4
-// CHECK4-NEXT:    [[TMP93:%.*]] = bitcast i8** [[TMP92]] to [5 x [10 x double]]**
-// CHECK4-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP93]], align 4
-// CHECK4-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK4-NEXT:    store i64 400, i64* [[TMP94]], align 4
-// CHECK4-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 4
-// CHECK4-NEXT:    store i8* null, i8** [[TMP95]], align 4
-// CHECK4-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 5
-// CHECK4-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i32*
-// CHECK4-NEXT:    store i32 5, i32* [[TMP97]], align 4
-// CHECK4-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 5
+// CHECK4-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 4
+// CHECK4-NEXT:    store i8* null, i8** [[TMP92]], align 4
+// CHECK4-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 5
+// CHECK4-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to i32*
+// CHECK4-NEXT:    store i32 5, i32* [[TMP94]], align 4
+// CHECK4-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 5
+// CHECK4-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i32*
+// CHECK4-NEXT:    store i32 5, i32* [[TMP96]], align 4
+// CHECK4-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 5
+// CHECK4-NEXT:    store i8* null, i8** [[TMP97]], align 4
+// CHECK4-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 6
 // CHECK4-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i32*
-// CHECK4-NEXT:    store i32 5, i32* [[TMP99]], align 4
-// CHECK4-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK4-NEXT:    store i64 4, i64* [[TMP100]], align 4
-// CHECK4-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 5
-// CHECK4-NEXT:    store i8* null, i8** [[TMP101]], align 4
-// CHECK4-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 6
-// CHECK4-NEXT:    [[TMP103:%.*]] = bitcast i8** [[TMP102]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[TMP103]], align 4
-// CHECK4-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 6
-// CHECK4-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[TMP105]], align 4
-// CHECK4-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK4-NEXT:    store i64 4, i64* [[TMP106]], align 4
-// CHECK4-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 6
-// CHECK4-NEXT:    store i8* null, i8** [[TMP107]], align 4
-// CHECK4-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 7
-// CHECK4-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to double**
-// CHECK4-NEXT:    store double* [[VLA1]], double** [[TMP109]], align 4
-// CHECK4-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 7
-// CHECK4-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to double**
-// CHECK4-NEXT:    store double* [[VLA1]], double** [[TMP111]], align 4
-// CHECK4-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK4-NEXT:    store i64 [[TMP65]], i64* [[TMP112]], align 4
-// CHECK4-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 7
+// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[TMP99]], align 4
+// CHECK4-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 6
+// CHECK4-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[TMP101]], align 4
+// CHECK4-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 6
+// CHECK4-NEXT:    store i8* null, i8** [[TMP102]], align 4
+// CHECK4-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 7
+// CHECK4-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to double**
+// CHECK4-NEXT:    store double* [[VLA1]], double** [[TMP104]], align 4
+// CHECK4-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 7
+// CHECK4-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to double**
+// CHECK4-NEXT:    store double* [[VLA1]], double** [[TMP106]], align 4
+// CHECK4-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK4-NEXT:    store i64 [[TMP65]], i64* [[TMP107]], align 4
+// CHECK4-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 7
+// CHECK4-NEXT:    store i8* null, i8** [[TMP108]], align 4
+// CHECK4-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 8
+// CHECK4-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to %struct.TT**
+// CHECK4-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP110]], align 4
+// CHECK4-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 8
+// CHECK4-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to %struct.TT**
+// CHECK4-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP112]], align 4
+// CHECK4-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 8
 // CHECK4-NEXT:    store i8* null, i8** [[TMP113]], align 4
-// CHECK4-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 8
-// CHECK4-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to %struct.TT**
-// CHECK4-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP115]], align 4
-// CHECK4-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 8
-// CHECK4-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to %struct.TT**
-// CHECK4-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP117]], align 4
-// CHECK4-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK4-NEXT:    store i64 12, i64* [[TMP118]], align 4
-// CHECK4-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 8
-// CHECK4-NEXT:    store i8* null, i8** [[TMP119]], align 4
-// CHECK4-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 9
-// CHECK4-NEXT:    [[TMP121:%.*]] = bitcast i8** [[TMP120]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP59]], i32* [[TMP121]], align 4
-// CHECK4-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 9
-// CHECK4-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP59]], i32* [[TMP123]], align 4
-// CHECK4-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK4-NEXT:    store i64 4, i64* [[TMP124]], align 4
-// CHECK4-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 9
-// CHECK4-NEXT:    store i8* null, i8** [[TMP125]], align 4
-// CHECK4-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP129:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140.region_id, i32 10, i8** [[TMP126]], i8** [[TMP127]], i64* [[TMP128]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK4-NEXT:    [[TMP130:%.*]] = icmp ne i32 [[TMP129]], 0
-// CHECK4-NEXT:    br i1 [[TMP130]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
+// CHECK4-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 9
+// CHECK4-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP59]], i32* [[TMP115]], align 4
+// CHECK4-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 9
+// CHECK4-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP59]], i32* [[TMP117]], align 4
+// CHECK4-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 9
+// CHECK4-NEXT:    store i8* null, i8** [[TMP118]], align 4
+// CHECK4-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP122:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140.region_id, i32 10, i8** [[TMP119]], i8** [[TMP120]], i64* [[TMP121]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK4-NEXT:    [[TMP123:%.*]] = icmp ne i32 [[TMP122]], 0
+// CHECK4-NEXT:    br i1 [[TMP123]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK4:       omp_offload.failed17:
 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140(i32 [[TMP57]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]], i32 [[TMP59]]) #[[ATTR4]]
 // CHECK4-NEXT:    br label [[OMP_OFFLOAD_CONT18]]
@@ -5265,10 +5191,10 @@ int bar(int n){
 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140(i32 [[TMP57]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]], i32 [[TMP59]]) #[[ATTR4]]
 // CHECK4-NEXT:    br label [[OMP_IF_END20]]
 // CHECK4:       omp_if.end20:
-// CHECK4-NEXT:    [[TMP131:%.*]] = load i32, i32* [[A]], align 4
-// CHECK4-NEXT:    [[TMP132:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP132]])
-// CHECK4-NEXT:    ret i32 [[TMP131]]
+// CHECK4-NEXT:    [[TMP124:%.*]] = load i32, i32* [[A]], align 4
+// CHECK4-NEXT:    [[TMP125:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP125]])
+// CHECK4-NEXT:    ret i32 [[TMP124]]
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96
@@ -6000,14 +5926,14 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP7:%.*]] = mul nuw i32 2, [[TMP1]]
 // CHECK4-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 2
 // CHECK4-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-// CHECK4-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK4-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
-// CHECK4-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK4-NEXT:    store double* [[A]], double** [[TMP13]], align 4
-// CHECK4-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK4-NEXT:    store i64 8, i64* [[TMP14]], align 4
+// CHECK4-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK4-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.11 to i8*), i32 40, i1 false)
+// CHECK4-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK4-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 4
+// CHECK4-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK4-NEXT:    store double* [[A]], double** [[TMP14]], align 4
 // CHECK4-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK4-NEXT:    store i8* null, i8** [[TMP15]], align 4
 // CHECK4-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -6016,46 +5942,40 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK4-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32*
 // CHECK4-NEXT:    store i32 [[TMP5]], i32* [[TMP19]], align 4
-// CHECK4-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK4-NEXT:    store i64 4, i64* [[TMP20]], align 4
-// CHECK4-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK4-NEXT:    store i8* null, i8** [[TMP21]], align 4
-// CHECK4-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK4-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
-// CHECK4-NEXT:    store i32 2, i32* [[TMP23]], align 4
-// CHECK4-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK4-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
-// CHECK4-NEXT:    store i32 2, i32* [[TMP25]], align 4
-// CHECK4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK4-NEXT:    store i64 4, i64* [[TMP26]], align 4
-// CHECK4-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK4-NEXT:    store i8* null, i8** [[TMP27]], align 4
-// CHECK4-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK4-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK4-NEXT:    store i8* null, i8** [[TMP20]], align 4
+// CHECK4-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK4-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
+// CHECK4-NEXT:    store i32 2, i32* [[TMP22]], align 4
+// CHECK4-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK4-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
+// CHECK4-NEXT:    store i32 2, i32* [[TMP24]], align 4
+// CHECK4-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK4-NEXT:    store i8* null, i8** [[TMP25]], align 4
+// CHECK4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK4-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP27]], align 4
+// CHECK4-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK4-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
 // CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP29]], align 4
-// CHECK4-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK4-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP31]], align 4
-// CHECK4-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK4-NEXT:    store i64 4, i64* [[TMP32]], align 4
-// CHECK4-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK4-NEXT:    store i8* null, i8** [[TMP33]], align 4
-// CHECK4-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK4-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK4-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 4
-// CHECK4-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK4-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK4-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 4
-// CHECK4-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK4-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 4
-// CHECK4-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK4-NEXT:    store i8* null, i8** [[TMP39]], align 4
-// CHECK4-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK4-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK4-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK4-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK4-NEXT:    store i8* null, i8** [[TMP30]], align 4
+// CHECK4-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK4-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK4-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 4
+// CHECK4-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK4-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK4-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 4
+// CHECK4-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK4-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 4
+// CHECK4-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK4-NEXT:    store i8* null, i8** [[TMP36]], align 4
+// CHECK4-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK4-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK4-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK4:       omp_offload.failed:
 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK4-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -6065,15 +5985,15 @@ int bar(int n){
 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK4-NEXT:    br label [[OMP_IF_END]]
 // CHECK4:       omp_if.end:
-// CHECK4-NEXT:    [[TMP45:%.*]] = mul nsw i32 1, [[TMP1]]
-// CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP45]]
+// CHECK4-NEXT:    [[TMP42:%.*]] = mul nsw i32 1, [[TMP1]]
+// CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP42]]
 // CHECK4-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
-// CHECK4-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK4-NEXT:    [[CONV:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK4-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK4-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP47]]
-// CHECK4-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK4-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK4-NEXT:    [[CONV:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK4-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK4-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP44]]
+// CHECK4-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK4-NEXT:    ret i32 [[ADD3]]
 //
 //
@@ -6144,7 +6064,7 @@ int bar(int n){
 // CHECK4-NEXT:    store i8* null, i8** [[TMP26]], align 4
 // CHECK4-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK4-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l195.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK4-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l195.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK4-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK4-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK4:       omp_offload.failed:
@@ -6212,7 +6132,7 @@ int bar(int n){
 // CHECK4-NEXT:    store i8* null, i8** [[TMP19]], align 4
 // CHECK4-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK4-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l178.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.15, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK4-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l178.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK4-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK4-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK4:       omp_offload.failed:
@@ -6249,11 +6169,11 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4
 // CHECK4-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
 // CHECK4-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -6376,11 +6296,11 @@ int bar(int n){
 // CHECK4-NEXT:    [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
 // CHECK4-NEXT:    store i8 [[TMP5]], i8* [[CONV3]], align 1
 // CHECK4-NEXT:    [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -6423,11 +6343,11 @@ int bar(int n){
 // CHECK4-NEXT:    [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
 // CHECK4-NEXT:    store i16 [[TMP3]], i16* [[CONV1]], align 2
 // CHECK4-NEXT:    [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -6515,7 +6435,7 @@ int bar(int n){
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK4-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK4-SAME: () #[[ATTR8:[0-9]+]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK4-NEXT:    ret void
@@ -6696,14 +6616,14 @@ int bar(int n){
 // CHECK5-NEXT:    [[TMP65:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK5-NEXT:    [[TMP66:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK5-NEXT:    [[TMP67:%.*]] = mul nuw i64 [[TMP66]], 8
-// CHECK5-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK5-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
-// CHECK5-NEXT:    store i64 [[TMP61]], i64* [[TMP69]], align 8
-// CHECK5-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK5-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
-// CHECK5-NEXT:    store i64 [[TMP61]], i64* [[TMP71]], align 8
-// CHECK5-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK5-NEXT:    store i64 4, i64* [[TMP72]], align 8
+// CHECK5-NEXT:    [[TMP68:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK5-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP68]], i8* align 8 bitcast ([10 x i64]* @.offload_sizes.8 to i8*), i64 80, i1 false)
+// CHECK5-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK5-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
+// CHECK5-NEXT:    store i64 [[TMP61]], i64* [[TMP70]], align 8
+// CHECK5-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK5-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i64*
+// CHECK5-NEXT:    store i64 [[TMP61]], i64* [[TMP72]], align 8
 // CHECK5-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0
 // CHECK5-NEXT:    store i8* null, i8** [[TMP73]], align 8
 // CHECK5-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 1
@@ -6712,96 +6632,82 @@ int bar(int n){
 // CHECK5-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 1
 // CHECK5-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to [10 x float]**
 // CHECK5-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP77]], align 8
-// CHECK5-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK5-NEXT:    store i64 40, i64* [[TMP78]], align 8
-// CHECK5-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
-// CHECK5-NEXT:    store i8* null, i8** [[TMP79]], align 8
-// CHECK5-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
-// CHECK5-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i64*
-// CHECK5-NEXT:    store i64 [[TMP2]], i64* [[TMP81]], align 8
-// CHECK5-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
-// CHECK5-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
-// CHECK5-NEXT:    store i64 [[TMP2]], i64* [[TMP83]], align 8
-// CHECK5-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK5-NEXT:    store i64 8, i64* [[TMP84]], align 8
-// CHECK5-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
-// CHECK5-NEXT:    store i8* null, i8** [[TMP85]], align 8
-// CHECK5-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
+// CHECK5-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
+// CHECK5-NEXT:    store i8* null, i8** [[TMP78]], align 8
+// CHECK5-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
+// CHECK5-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
+// CHECK5-NEXT:    store i64 [[TMP2]], i64* [[TMP80]], align 8
+// CHECK5-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
+// CHECK5-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i64*
+// CHECK5-NEXT:    store i64 [[TMP2]], i64* [[TMP82]], align 8
+// CHECK5-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
+// CHECK5-NEXT:    store i8* null, i8** [[TMP83]], align 8
+// CHECK5-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
+// CHECK5-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to float**
+// CHECK5-NEXT:    store float* [[VLA]], float** [[TMP85]], align 8
+// CHECK5-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
 // CHECK5-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to float**
 // CHECK5-NEXT:    store float* [[VLA]], float** [[TMP87]], align 8
-// CHECK5-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
-// CHECK5-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to float**
-// CHECK5-NEXT:    store float* [[VLA]], float** [[TMP89]], align 8
-// CHECK5-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK5-NEXT:    store i64 [[TMP65]], i64* [[TMP90]], align 8
-// CHECK5-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 3
-// CHECK5-NEXT:    store i8* null, i8** [[TMP91]], align 8
-// CHECK5-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
+// CHECK5-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK5-NEXT:    store i64 [[TMP65]], i64* [[TMP88]], align 8
+// CHECK5-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 3
+// CHECK5-NEXT:    store i8* null, i8** [[TMP89]], align 8
+// CHECK5-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
+// CHECK5-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to [5 x [10 x double]]**
+// CHECK5-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP91]], align 8
+// CHECK5-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
 // CHECK5-NEXT:    [[TMP93:%.*]] = bitcast i8** [[TMP92]] to [5 x [10 x double]]**
 // CHECK5-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP93]], align 8
-// CHECK5-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
-// CHECK5-NEXT:    [[TMP95:%.*]] = bitcast i8** [[TMP94]] to [5 x [10 x double]]**
-// CHECK5-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP95]], align 8
-// CHECK5-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK5-NEXT:    store i64 400, i64* [[TMP96]], align 8
-// CHECK5-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 4
-// CHECK5-NEXT:    store i8* null, i8** [[TMP97]], align 8
-// CHECK5-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 5
-// CHECK5-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i64*
-// CHECK5-NEXT:    store i64 5, i64* [[TMP99]], align 8
-// CHECK5-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 5
+// CHECK5-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 4
+// CHECK5-NEXT:    store i8* null, i8** [[TMP94]], align 8
+// CHECK5-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 5
+// CHECK5-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i64*
+// CHECK5-NEXT:    store i64 5, i64* [[TMP96]], align 8
+// CHECK5-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 5
+// CHECK5-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to i64*
+// CHECK5-NEXT:    store i64 5, i64* [[TMP98]], align 8
+// CHECK5-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 5
+// CHECK5-NEXT:    store i8* null, i8** [[TMP99]], align 8
+// CHECK5-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 6
 // CHECK5-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i64*
-// CHECK5-NEXT:    store i64 5, i64* [[TMP101]], align 8
-// CHECK5-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK5-NEXT:    store i64 8, i64* [[TMP102]], align 8
-// CHECK5-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 5
-// CHECK5-NEXT:    store i8* null, i8** [[TMP103]], align 8
-// CHECK5-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 6
-// CHECK5-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i64*
-// CHECK5-NEXT:    store i64 [[TMP5]], i64* [[TMP105]], align 8
-// CHECK5-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 6
-// CHECK5-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i64*
-// CHECK5-NEXT:    store i64 [[TMP5]], i64* [[TMP107]], align 8
-// CHECK5-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK5-NEXT:    store i64 8, i64* [[TMP108]], align 8
-// CHECK5-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 6
-// CHECK5-NEXT:    store i8* null, i8** [[TMP109]], align 8
-// CHECK5-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 7
-// CHECK5-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to double**
-// CHECK5-NEXT:    store double* [[VLA1]], double** [[TMP111]], align 8
-// CHECK5-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 7
-// CHECK5-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to double**
-// CHECK5-NEXT:    store double* [[VLA1]], double** [[TMP113]], align 8
-// CHECK5-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK5-NEXT:    store i64 [[TMP67]], i64* [[TMP114]], align 8
-// CHECK5-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 7
+// CHECK5-NEXT:    store i64 [[TMP5]], i64* [[TMP101]], align 8
+// CHECK5-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 6
+// CHECK5-NEXT:    [[TMP103:%.*]] = bitcast i8** [[TMP102]] to i64*
+// CHECK5-NEXT:    store i64 [[TMP5]], i64* [[TMP103]], align 8
+// CHECK5-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 6
+// CHECK5-NEXT:    store i8* null, i8** [[TMP104]], align 8
+// CHECK5-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 7
+// CHECK5-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to double**
+// CHECK5-NEXT:    store double* [[VLA1]], double** [[TMP106]], align 8
+// CHECK5-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 7
+// CHECK5-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to double**
+// CHECK5-NEXT:    store double* [[VLA1]], double** [[TMP108]], align 8
+// CHECK5-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK5-NEXT:    store i64 [[TMP67]], i64* [[TMP109]], align 8
+// CHECK5-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 7
+// CHECK5-NEXT:    store i8* null, i8** [[TMP110]], align 8
+// CHECK5-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 8
+// CHECK5-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to %struct.TT**
+// CHECK5-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP112]], align 8
+// CHECK5-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 8
+// CHECK5-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to %struct.TT**
+// CHECK5-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP114]], align 8
+// CHECK5-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 8
 // CHECK5-NEXT:    store i8* null, i8** [[TMP115]], align 8
-// CHECK5-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 8
-// CHECK5-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to %struct.TT**
-// CHECK5-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP117]], align 8
-// CHECK5-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 8
-// CHECK5-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to %struct.TT**
-// CHECK5-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP119]], align 8
-// CHECK5-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK5-NEXT:    store i64 16, i64* [[TMP120]], align 8
-// CHECK5-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 8
-// CHECK5-NEXT:    store i8* null, i8** [[TMP121]], align 8
-// CHECK5-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 9
-// CHECK5-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i64*
-// CHECK5-NEXT:    store i64 [[TMP63]], i64* [[TMP123]], align 8
-// CHECK5-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 9
-// CHECK5-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to i64*
-// CHECK5-NEXT:    store i64 [[TMP63]], i64* [[TMP125]], align 8
-// CHECK5-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK5-NEXT:    store i64 4, i64* [[TMP126]], align 8
-// CHECK5-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 9
-// CHECK5-NEXT:    store i8* null, i8** [[TMP127]], align 8
-// CHECK5-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK5-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK5-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK5-NEXT:    [[TMP131:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140.region_id, i32 10, i8** [[TMP128]], i8** [[TMP129]], i64* [[TMP130]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK5-NEXT:    [[TMP132:%.*]] = icmp ne i32 [[TMP131]], 0
-// CHECK5-NEXT:    br i1 [[TMP132]], label [[OMP_OFFLOAD_FAILED23:%.*]], label [[OMP_OFFLOAD_CONT24:%.*]]
+// CHECK5-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 9
+// CHECK5-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i64*
+// CHECK5-NEXT:    store i64 [[TMP63]], i64* [[TMP117]], align 8
+// CHECK5-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 9
+// CHECK5-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to i64*
+// CHECK5-NEXT:    store i64 [[TMP63]], i64* [[TMP119]], align 8
+// CHECK5-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 9
+// CHECK5-NEXT:    store i8* null, i8** [[TMP120]], align 8
+// CHECK5-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK5-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK5-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK5-NEXT:    [[TMP124:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140.region_id, i32 10, i8** [[TMP121]], i8** [[TMP122]], i64* [[TMP123]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK5-NEXT:    [[TMP125:%.*]] = icmp ne i32 [[TMP124]], 0
+// CHECK5-NEXT:    br i1 [[TMP125]], label [[OMP_OFFLOAD_FAILED23:%.*]], label [[OMP_OFFLOAD_CONT24:%.*]]
 // CHECK5:       omp_offload.failed23:
 // CHECK5-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140(i64 [[TMP61]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]], i64 [[TMP63]]) #[[ATTR4]]
 // CHECK5-NEXT:    br label [[OMP_OFFLOAD_CONT24]]
@@ -6811,10 +6717,10 @@ int bar(int n){
 // CHECK5-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140(i64 [[TMP61]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]], i64 [[TMP63]]) #[[ATTR4]]
 // CHECK5-NEXT:    br label [[OMP_IF_END26]]
 // CHECK5:       omp_if.end26:
-// CHECK5-NEXT:    [[TMP133:%.*]] = load i32, i32* [[A]], align 4
-// CHECK5-NEXT:    [[TMP134:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK5-NEXT:    call void @llvm.stackrestore(i8* [[TMP134]])
-// CHECK5-NEXT:    ret i32 [[TMP133]]
+// CHECK5-NEXT:    [[TMP126:%.*]] = load i32, i32* [[A]], align 4
+// CHECK5-NEXT:    [[TMP127:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK5-NEXT:    call void @llvm.stackrestore(i8* [[TMP127]])
+// CHECK5-NEXT:    ret i32 [[TMP126]]
 //
 //
 // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96
@@ -7579,14 +7485,14 @@ int bar(int n){
 // CHECK5-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP11:%.*]] = mul nuw i64 2, [[TMP2]]
 // CHECK5-NEXT:    [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 2
-// CHECK5-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to %struct.S1**
-// CHECK5-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP14]], align 8
-// CHECK5-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to double**
-// CHECK5-NEXT:    store double* [[A]], double** [[TMP16]], align 8
-// CHECK5-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK5-NEXT:    store i64 8, i64* [[TMP17]], align 8
+// CHECK5-NEXT:    [[TMP13:%.*]] = bitcast [6 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK5-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP13]], i8* align 8 bitcast ([6 x i64]* @.offload_sizes.11 to i8*), i64 48, i1 false)
+// CHECK5-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK5-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to %struct.S1**
+// CHECK5-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP15]], align 8
+// CHECK5-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK5-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to double**
+// CHECK5-NEXT:    store double* [[A]], double** [[TMP17]], align 8
 // CHECK5-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK5-NEXT:    store i8* null, i8** [[TMP18]], align 8
 // CHECK5-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -7595,59 +7501,51 @@ int bar(int n){
 // CHECK5-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK5-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
 // CHECK5-NEXT:    store i64 [[TMP7]], i64* [[TMP22]], align 8
-// CHECK5-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK5-NEXT:    store i64 4, i64* [[TMP23]], align 8
-// CHECK5-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK5-NEXT:    store i8* null, i8** [[TMP24]], align 8
-// CHECK5-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK5-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
-// CHECK5-NEXT:    store i64 2, i64* [[TMP26]], align 8
-// CHECK5-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK5-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64*
-// CHECK5-NEXT:    store i64 2, i64* [[TMP28]], align 8
-// CHECK5-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK5-NEXT:    store i64 8, i64* [[TMP29]], align 8
-// CHECK5-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK5-NEXT:    store i8* null, i8** [[TMP30]], align 8
-// CHECK5-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK5-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK5-NEXT:    store i8* null, i8** [[TMP23]], align 8
+// CHECK5-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK5-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
+// CHECK5-NEXT:    store i64 2, i64* [[TMP25]], align 8
+// CHECK5-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK5-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK5-NEXT:    store i64 2, i64* [[TMP27]], align 8
+// CHECK5-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK5-NEXT:    store i8* null, i8** [[TMP28]], align 8
+// CHECK5-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK5-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i64*
+// CHECK5-NEXT:    store i64 [[TMP2]], i64* [[TMP30]], align 8
+// CHECK5-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK5-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i64*
 // CHECK5-NEXT:    store i64 [[TMP2]], i64* [[TMP32]], align 8
-// CHECK5-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK5-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i64*
-// CHECK5-NEXT:    store i64 [[TMP2]], i64* [[TMP34]], align 8
-// CHECK5-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK5-NEXT:    store i64 8, i64* [[TMP35]], align 8
-// CHECK5-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK5-NEXT:    store i8* null, i8** [[TMP36]], align 8
-// CHECK5-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK5-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i16**
-// CHECK5-NEXT:    store i16* [[VLA]], i16** [[TMP38]], align 8
-// CHECK5-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK5-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i16**
-// CHECK5-NEXT:    store i16* [[VLA]], i16** [[TMP40]], align 8
-// CHECK5-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK5-NEXT:    store i64 [[TMP12]], i64* [[TMP41]], align 8
-// CHECK5-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK5-NEXT:    store i8* null, i8** [[TMP42]], align 8
-// CHECK5-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
-// CHECK5-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
-// CHECK5-NEXT:    store i64 [[TMP9]], i64* [[TMP44]], align 8
-// CHECK5-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
-// CHECK5-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i64*
-// CHECK5-NEXT:    store i64 [[TMP9]], i64* [[TMP46]], align 8
-// CHECK5-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK5-NEXT:    store i64 1, i64* [[TMP47]], align 8
-// CHECK5-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
-// CHECK5-NEXT:    store i8* null, i8** [[TMP48]], align 8
-// CHECK5-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK5-NEXT:    [[TMP52:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1
-// CHECK5-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP52]] to i1
-// CHECK5-NEXT:    [[TMP53:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1
-// CHECK5-NEXT:    [[TMP54:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l214.region_id, i32 6, i8** [[TMP49]], i8** [[TMP50]], i64* [[TMP51]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 1, i32 [[TMP53]])
-// CHECK5-NEXT:    [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0
-// CHECK5-NEXT:    br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK5-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK5-NEXT:    store i8* null, i8** [[TMP33]], align 8
+// CHECK5-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK5-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
+// CHECK5-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 8
+// CHECK5-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK5-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
+// CHECK5-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 8
+// CHECK5-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK5-NEXT:    store i64 [[TMP12]], i64* [[TMP38]], align 8
+// CHECK5-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK5-NEXT:    store i8* null, i8** [[TMP39]], align 8
+// CHECK5-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
+// CHECK5-NEXT:    [[TMP41:%.*]] = bitcast i8** [[TMP40]] to i64*
+// CHECK5-NEXT:    store i64 [[TMP9]], i64* [[TMP41]], align 8
+// CHECK5-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK5-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
+// CHECK5-NEXT:    store i64 [[TMP9]], i64* [[TMP43]], align 8
+// CHECK5-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
+// CHECK5-NEXT:    store i8* null, i8** [[TMP44]], align 8
+// CHECK5-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK5-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK5-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK5-NEXT:    [[TMP48:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1
+// CHECK5-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP48]] to i1
+// CHECK5-NEXT:    [[TMP49:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1
+// CHECK5-NEXT:    [[TMP50:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l214.region_id, i32 6, i8** [[TMP45]], i8** [[TMP46]], i64* [[TMP47]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 1, i32 [[TMP49]])
+// CHECK5-NEXT:    [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
+// CHECK5-NEXT:    br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK5:       omp_offload.failed:
 // CHECK5-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l214(%struct.S1* [[THIS1]], i64 [[TMP7]], i64 2, i64 [[TMP2]], i16* [[VLA]], i64 [[TMP9]]) #[[ATTR4]]
 // CHECK5-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -7657,15 +7555,15 @@ int bar(int n){
 // CHECK5-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l214(%struct.S1* [[THIS1]], i64 [[TMP7]], i64 2, i64 [[TMP2]], i16* [[VLA]], i64 [[TMP9]]) #[[ATTR4]]
 // CHECK5-NEXT:    br label [[OMP_IF_END]]
 // CHECK5:       omp_if.end:
-// CHECK5-NEXT:    [[TMP56:%.*]] = mul nsw i64 1, [[TMP2]]
-// CHECK5-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP56]]
+// CHECK5-NEXT:    [[TMP52:%.*]] = mul nsw i64 1, [[TMP2]]
+// CHECK5-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP52]]
 // CHECK5-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
-// CHECK5-NEXT:    [[TMP57:%.*]] = load i16, i16* [[ARRAYIDX6]], align 2
-// CHECK5-NEXT:    [[CONV7:%.*]] = sext i16 [[TMP57]] to i32
-// CHECK5-NEXT:    [[TMP58:%.*]] = load i32, i32* [[B]], align 4
-// CHECK5-NEXT:    [[ADD8:%.*]] = add nsw i32 [[CONV7]], [[TMP58]]
-// CHECK5-NEXT:    [[TMP59:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK5-NEXT:    call void @llvm.stackrestore(i8* [[TMP59]])
+// CHECK5-NEXT:    [[TMP53:%.*]] = load i16, i16* [[ARRAYIDX6]], align 2
+// CHECK5-NEXT:    [[CONV7:%.*]] = sext i16 [[TMP53]] to i32
+// CHECK5-NEXT:    [[TMP54:%.*]] = load i32, i32* [[B]], align 4
+// CHECK5-NEXT:    [[ADD8:%.*]] = add nsw i32 [[CONV7]], [[TMP54]]
+// CHECK5-NEXT:    [[TMP55:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK5-NEXT:    call void @llvm.stackrestore(i8* [[TMP55]])
 // CHECK5-NEXT:    ret i32 [[ADD8]]
 //
 //
@@ -7737,7 +7635,7 @@ int bar(int n){
 // CHECK5-NEXT:    store i8* null, i8** [[TMP26]], align 8
 // CHECK5-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l195.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK5-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l195.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK5-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK5-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK5:       omp_offload.failed:
@@ -7806,7 +7704,7 @@ int bar(int n){
 // CHECK5-NEXT:    store i8* null, i8** [[TMP19]], align 8
 // CHECK5-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l178.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.15, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK5-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l178.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK5-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK5-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK5:       omp_offload.failed:
@@ -7862,20 +7760,20 @@ int bar(int n){
 // CHECK5-NEXT:    [[TOBOOL6:%.*]] = trunc i8 [[TMP9]] to i1
 // CHECK5-NEXT:    br i1 [[TOBOOL6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK5:       omp_if.then:
-// CHECK5-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], i16* [[TMP4]], i64 [[TMP8]])
+// CHECK5-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], i16* [[TMP4]], i64 [[TMP8]])
 // CHECK5-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK5:       omp_if.else:
 // CHECK5-NEXT:    call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
 // CHECK5-NEXT:    store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
 // CHECK5-NEXT:    store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
-// CHECK5-NEXT:    call void @.omp_outlined..9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], i16* [[TMP4]], i64 [[TMP8]]) #[[ATTR4]]
+// CHECK5-NEXT:    call void @.omp_outlined..10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], i16* [[TMP4]], i64 [[TMP8]]) #[[ATTR4]]
 // CHECK5-NEXT:    call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
 // CHECK5-NEXT:    br label [[OMP_IF_END]]
 // CHECK5:       omp_if.end:
 // CHECK5-NEXT:    ret void
 //
 //
-// CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] {
 // CHECK5-NEXT:  entry:
 // CHECK5-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -8064,11 +7962,11 @@ int bar(int n){
 // CHECK5-NEXT:    [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
 // CHECK5-NEXT:    store i8 [[TMP5]], i8* [[CONV5]], align 1
 // CHECK5-NEXT:    [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
-// CHECK5-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK5-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK5-NEXT:    ret void
 //
 //
-// CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK5-NEXT:  entry:
 // CHECK5-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -8114,11 +8012,11 @@ int bar(int n){
 // CHECK5-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
 // CHECK5-NEXT:    store i16 [[TMP3]], i16* [[CONV3]], align 2
 // CHECK5-NEXT:    [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8
-// CHECK5-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK5-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK5-NEXT:    ret void
 //
 //
-// CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK5-NEXT:  entry:
 // CHECK5-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -8207,7 +8105,7 @@ int bar(int n){
 //
 //
 // CHECK5-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK5-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK5-SAME: () #[[ATTR8:[0-9]+]] {
 // CHECK5-NEXT:  entry:
 // CHECK5-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK5-NEXT:    ret void
@@ -8388,14 +8286,14 @@ int bar(int n){
 // CHECK6-NEXT:    [[TMP65:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK6-NEXT:    [[TMP66:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK6-NEXT:    [[TMP67:%.*]] = mul nuw i64 [[TMP66]], 8
-// CHECK6-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK6-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
-// CHECK6-NEXT:    store i64 [[TMP61]], i64* [[TMP69]], align 8
-// CHECK6-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK6-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
-// CHECK6-NEXT:    store i64 [[TMP61]], i64* [[TMP71]], align 8
-// CHECK6-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK6-NEXT:    store i64 4, i64* [[TMP72]], align 8
+// CHECK6-NEXT:    [[TMP68:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK6-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP68]], i8* align 8 bitcast ([10 x i64]* @.offload_sizes.8 to i8*), i64 80, i1 false)
+// CHECK6-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK6-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
+// CHECK6-NEXT:    store i64 [[TMP61]], i64* [[TMP70]], align 8
+// CHECK6-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK6-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i64*
+// CHECK6-NEXT:    store i64 [[TMP61]], i64* [[TMP72]], align 8
 // CHECK6-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0
 // CHECK6-NEXT:    store i8* null, i8** [[TMP73]], align 8
 // CHECK6-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 1
@@ -8404,96 +8302,82 @@ int bar(int n){
 // CHECK6-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 1
 // CHECK6-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to [10 x float]**
 // CHECK6-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP77]], align 8
-// CHECK6-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK6-NEXT:    store i64 40, i64* [[TMP78]], align 8
-// CHECK6-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
-// CHECK6-NEXT:    store i8* null, i8** [[TMP79]], align 8
-// CHECK6-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
-// CHECK6-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i64*
-// CHECK6-NEXT:    store i64 [[TMP2]], i64* [[TMP81]], align 8
-// CHECK6-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
-// CHECK6-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
-// CHECK6-NEXT:    store i64 [[TMP2]], i64* [[TMP83]], align 8
-// CHECK6-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK6-NEXT:    store i64 8, i64* [[TMP84]], align 8
-// CHECK6-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
-// CHECK6-NEXT:    store i8* null, i8** [[TMP85]], align 8
-// CHECK6-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
+// CHECK6-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
+// CHECK6-NEXT:    store i8* null, i8** [[TMP78]], align 8
+// CHECK6-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
+// CHECK6-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
+// CHECK6-NEXT:    store i64 [[TMP2]], i64* [[TMP80]], align 8
+// CHECK6-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
+// CHECK6-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i64*
+// CHECK6-NEXT:    store i64 [[TMP2]], i64* [[TMP82]], align 8
+// CHECK6-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
+// CHECK6-NEXT:    store i8* null, i8** [[TMP83]], align 8
+// CHECK6-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
+// CHECK6-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to float**
+// CHECK6-NEXT:    store float* [[VLA]], float** [[TMP85]], align 8
+// CHECK6-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
 // CHECK6-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to float**
 // CHECK6-NEXT:    store float* [[VLA]], float** [[TMP87]], align 8
-// CHECK6-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
-// CHECK6-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to float**
-// CHECK6-NEXT:    store float* [[VLA]], float** [[TMP89]], align 8
-// CHECK6-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK6-NEXT:    store i64 [[TMP65]], i64* [[TMP90]], align 8
-// CHECK6-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 3
-// CHECK6-NEXT:    store i8* null, i8** [[TMP91]], align 8
-// CHECK6-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
+// CHECK6-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK6-NEXT:    store i64 [[TMP65]], i64* [[TMP88]], align 8
+// CHECK6-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 3
+// CHECK6-NEXT:    store i8* null, i8** [[TMP89]], align 8
+// CHECK6-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
+// CHECK6-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to [5 x [10 x double]]**
+// CHECK6-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP91]], align 8
+// CHECK6-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
 // CHECK6-NEXT:    [[TMP93:%.*]] = bitcast i8** [[TMP92]] to [5 x [10 x double]]**
 // CHECK6-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP93]], align 8
-// CHECK6-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
-// CHECK6-NEXT:    [[TMP95:%.*]] = bitcast i8** [[TMP94]] to [5 x [10 x double]]**
-// CHECK6-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP95]], align 8
-// CHECK6-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK6-NEXT:    store i64 400, i64* [[TMP96]], align 8
-// CHECK6-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 4
-// CHECK6-NEXT:    store i8* null, i8** [[TMP97]], align 8
-// CHECK6-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 5
-// CHECK6-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i64*
-// CHECK6-NEXT:    store i64 5, i64* [[TMP99]], align 8
-// CHECK6-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 5
+// CHECK6-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 4
+// CHECK6-NEXT:    store i8* null, i8** [[TMP94]], align 8
+// CHECK6-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 5
+// CHECK6-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i64*
+// CHECK6-NEXT:    store i64 5, i64* [[TMP96]], align 8
+// CHECK6-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 5
+// CHECK6-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to i64*
+// CHECK6-NEXT:    store i64 5, i64* [[TMP98]], align 8
+// CHECK6-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 5
+// CHECK6-NEXT:    store i8* null, i8** [[TMP99]], align 8
+// CHECK6-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 6
 // CHECK6-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i64*
-// CHECK6-NEXT:    store i64 5, i64* [[TMP101]], align 8
-// CHECK6-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK6-NEXT:    store i64 8, i64* [[TMP102]], align 8
-// CHECK6-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 5
-// CHECK6-NEXT:    store i8* null, i8** [[TMP103]], align 8
-// CHECK6-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 6
-// CHECK6-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i64*
-// CHECK6-NEXT:    store i64 [[TMP5]], i64* [[TMP105]], align 8
-// CHECK6-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 6
-// CHECK6-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i64*
-// CHECK6-NEXT:    store i64 [[TMP5]], i64* [[TMP107]], align 8
-// CHECK6-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK6-NEXT:    store i64 8, i64* [[TMP108]], align 8
-// CHECK6-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 6
-// CHECK6-NEXT:    store i8* null, i8** [[TMP109]], align 8
-// CHECK6-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 7
-// CHECK6-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to double**
-// CHECK6-NEXT:    store double* [[VLA1]], double** [[TMP111]], align 8
-// CHECK6-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 7
-// CHECK6-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to double**
-// CHECK6-NEXT:    store double* [[VLA1]], double** [[TMP113]], align 8
-// CHECK6-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK6-NEXT:    store i64 [[TMP67]], i64* [[TMP114]], align 8
-// CHECK6-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 7
+// CHECK6-NEXT:    store i64 [[TMP5]], i64* [[TMP101]], align 8
+// CHECK6-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 6
+// CHECK6-NEXT:    [[TMP103:%.*]] = bitcast i8** [[TMP102]] to i64*
+// CHECK6-NEXT:    store i64 [[TMP5]], i64* [[TMP103]], align 8
+// CHECK6-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 6
+// CHECK6-NEXT:    store i8* null, i8** [[TMP104]], align 8
+// CHECK6-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 7
+// CHECK6-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to double**
+// CHECK6-NEXT:    store double* [[VLA1]], double** [[TMP106]], align 8
+// CHECK6-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 7
+// CHECK6-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to double**
+// CHECK6-NEXT:    store double* [[VLA1]], double** [[TMP108]], align 8
+// CHECK6-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK6-NEXT:    store i64 [[TMP67]], i64* [[TMP109]], align 8
+// CHECK6-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 7
+// CHECK6-NEXT:    store i8* null, i8** [[TMP110]], align 8
+// CHECK6-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 8
+// CHECK6-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to %struct.TT**
+// CHECK6-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP112]], align 8
+// CHECK6-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 8
+// CHECK6-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to %struct.TT**
+// CHECK6-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP114]], align 8
+// CHECK6-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 8
 // CHECK6-NEXT:    store i8* null, i8** [[TMP115]], align 8
-// CHECK6-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 8
-// CHECK6-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to %struct.TT**
-// CHECK6-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP117]], align 8
-// CHECK6-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 8
-// CHECK6-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to %struct.TT**
-// CHECK6-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP119]], align 8
-// CHECK6-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK6-NEXT:    store i64 16, i64* [[TMP120]], align 8
-// CHECK6-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 8
-// CHECK6-NEXT:    store i8* null, i8** [[TMP121]], align 8
-// CHECK6-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 9
-// CHECK6-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i64*
-// CHECK6-NEXT:    store i64 [[TMP63]], i64* [[TMP123]], align 8
-// CHECK6-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 9
-// CHECK6-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to i64*
-// CHECK6-NEXT:    store i64 [[TMP63]], i64* [[TMP125]], align 8
-// CHECK6-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK6-NEXT:    store i64 4, i64* [[TMP126]], align 8
-// CHECK6-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 9
-// CHECK6-NEXT:    store i8* null, i8** [[TMP127]], align 8
-// CHECK6-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK6-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK6-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK6-NEXT:    [[TMP131:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140.region_id, i32 10, i8** [[TMP128]], i8** [[TMP129]], i64* [[TMP130]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK6-NEXT:    [[TMP132:%.*]] = icmp ne i32 [[TMP131]], 0
-// CHECK6-NEXT:    br i1 [[TMP132]], label [[OMP_OFFLOAD_FAILED23:%.*]], label [[OMP_OFFLOAD_CONT24:%.*]]
+// CHECK6-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 9
+// CHECK6-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i64*
+// CHECK6-NEXT:    store i64 [[TMP63]], i64* [[TMP117]], align 8
+// CHECK6-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 9
+// CHECK6-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to i64*
+// CHECK6-NEXT:    store i64 [[TMP63]], i64* [[TMP119]], align 8
+// CHECK6-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 9
+// CHECK6-NEXT:    store i8* null, i8** [[TMP120]], align 8
+// CHECK6-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK6-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK6-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK6-NEXT:    [[TMP124:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140.region_id, i32 10, i8** [[TMP121]], i8** [[TMP122]], i64* [[TMP123]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK6-NEXT:    [[TMP125:%.*]] = icmp ne i32 [[TMP124]], 0
+// CHECK6-NEXT:    br i1 [[TMP125]], label [[OMP_OFFLOAD_FAILED23:%.*]], label [[OMP_OFFLOAD_CONT24:%.*]]
 // CHECK6:       omp_offload.failed23:
 // CHECK6-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140(i64 [[TMP61]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]], i64 [[TMP63]]) #[[ATTR4]]
 // CHECK6-NEXT:    br label [[OMP_OFFLOAD_CONT24]]
@@ -8503,10 +8387,10 @@ int bar(int n){
 // CHECK6-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140(i64 [[TMP61]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]], i64 [[TMP63]]) #[[ATTR4]]
 // CHECK6-NEXT:    br label [[OMP_IF_END26]]
 // CHECK6:       omp_if.end26:
-// CHECK6-NEXT:    [[TMP133:%.*]] = load i32, i32* [[A]], align 4
-// CHECK6-NEXT:    [[TMP134:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK6-NEXT:    call void @llvm.stackrestore(i8* [[TMP134]])
-// CHECK6-NEXT:    ret i32 [[TMP133]]
+// CHECK6-NEXT:    [[TMP126:%.*]] = load i32, i32* [[A]], align 4
+// CHECK6-NEXT:    [[TMP127:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK6-NEXT:    call void @llvm.stackrestore(i8* [[TMP127]])
+// CHECK6-NEXT:    ret i32 [[TMP126]]
 //
 //
 // CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96
@@ -9271,14 +9155,14 @@ int bar(int n){
 // CHECK6-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
 // CHECK6-NEXT:    [[TMP11:%.*]] = mul nuw i64 2, [[TMP2]]
 // CHECK6-NEXT:    [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 2
-// CHECK6-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK6-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to %struct.S1**
-// CHECK6-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP14]], align 8
-// CHECK6-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK6-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to double**
-// CHECK6-NEXT:    store double* [[A]], double** [[TMP16]], align 8
-// CHECK6-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK6-NEXT:    store i64 8, i64* [[TMP17]], align 8
+// CHECK6-NEXT:    [[TMP13:%.*]] = bitcast [6 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK6-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP13]], i8* align 8 bitcast ([6 x i64]* @.offload_sizes.11 to i8*), i64 48, i1 false)
+// CHECK6-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK6-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to %struct.S1**
+// CHECK6-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP15]], align 8
+// CHECK6-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK6-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to double**
+// CHECK6-NEXT:    store double* [[A]], double** [[TMP17]], align 8
 // CHECK6-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK6-NEXT:    store i8* null, i8** [[TMP18]], align 8
 // CHECK6-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -9287,59 +9171,51 @@ int bar(int n){
 // CHECK6-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK6-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
 // CHECK6-NEXT:    store i64 [[TMP7]], i64* [[TMP22]], align 8
-// CHECK6-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK6-NEXT:    store i64 4, i64* [[TMP23]], align 8
-// CHECK6-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK6-NEXT:    store i8* null, i8** [[TMP24]], align 8
-// CHECK6-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK6-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
-// CHECK6-NEXT:    store i64 2, i64* [[TMP26]], align 8
-// CHECK6-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK6-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64*
-// CHECK6-NEXT:    store i64 2, i64* [[TMP28]], align 8
-// CHECK6-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK6-NEXT:    store i64 8, i64* [[TMP29]], align 8
-// CHECK6-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK6-NEXT:    store i8* null, i8** [[TMP30]], align 8
-// CHECK6-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK6-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK6-NEXT:    store i8* null, i8** [[TMP23]], align 8
+// CHECK6-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK6-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
+// CHECK6-NEXT:    store i64 2, i64* [[TMP25]], align 8
+// CHECK6-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK6-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK6-NEXT:    store i64 2, i64* [[TMP27]], align 8
+// CHECK6-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK6-NEXT:    store i8* null, i8** [[TMP28]], align 8
+// CHECK6-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK6-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i64*
+// CHECK6-NEXT:    store i64 [[TMP2]], i64* [[TMP30]], align 8
+// CHECK6-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK6-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i64*
 // CHECK6-NEXT:    store i64 [[TMP2]], i64* [[TMP32]], align 8
-// CHECK6-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK6-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i64*
-// CHECK6-NEXT:    store i64 [[TMP2]], i64* [[TMP34]], align 8
-// CHECK6-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK6-NEXT:    store i64 8, i64* [[TMP35]], align 8
-// CHECK6-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK6-NEXT:    store i8* null, i8** [[TMP36]], align 8
-// CHECK6-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK6-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i16**
-// CHECK6-NEXT:    store i16* [[VLA]], i16** [[TMP38]], align 8
-// CHECK6-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK6-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i16**
-// CHECK6-NEXT:    store i16* [[VLA]], i16** [[TMP40]], align 8
-// CHECK6-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK6-NEXT:    store i64 [[TMP12]], i64* [[TMP41]], align 8
-// CHECK6-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK6-NEXT:    store i8* null, i8** [[TMP42]], align 8
-// CHECK6-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
-// CHECK6-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
-// CHECK6-NEXT:    store i64 [[TMP9]], i64* [[TMP44]], align 8
-// CHECK6-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
-// CHECK6-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i64*
-// CHECK6-NEXT:    store i64 [[TMP9]], i64* [[TMP46]], align 8
-// CHECK6-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK6-NEXT:    store i64 1, i64* [[TMP47]], align 8
-// CHECK6-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
-// CHECK6-NEXT:    store i8* null, i8** [[TMP48]], align 8
-// CHECK6-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK6-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK6-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK6-NEXT:    [[TMP52:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1
-// CHECK6-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP52]] to i1
-// CHECK6-NEXT:    [[TMP53:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1
-// CHECK6-NEXT:    [[TMP54:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l214.region_id, i32 6, i8** [[TMP49]], i8** [[TMP50]], i64* [[TMP51]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 1, i32 [[TMP53]])
-// CHECK6-NEXT:    [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0
-// CHECK6-NEXT:    br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK6-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK6-NEXT:    store i8* null, i8** [[TMP33]], align 8
+// CHECK6-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK6-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
+// CHECK6-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 8
+// CHECK6-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK6-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
+// CHECK6-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 8
+// CHECK6-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK6-NEXT:    store i64 [[TMP12]], i64* [[TMP38]], align 8
+// CHECK6-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK6-NEXT:    store i8* null, i8** [[TMP39]], align 8
+// CHECK6-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
+// CHECK6-NEXT:    [[TMP41:%.*]] = bitcast i8** [[TMP40]] to i64*
+// CHECK6-NEXT:    store i64 [[TMP9]], i64* [[TMP41]], align 8
+// CHECK6-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK6-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
+// CHECK6-NEXT:    store i64 [[TMP9]], i64* [[TMP43]], align 8
+// CHECK6-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
+// CHECK6-NEXT:    store i8* null, i8** [[TMP44]], align 8
+// CHECK6-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK6-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK6-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK6-NEXT:    [[TMP48:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1
+// CHECK6-NEXT:    [[TOBOOL5:%.*]] = trunc i8 [[TMP48]] to i1
+// CHECK6-NEXT:    [[TMP49:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1
+// CHECK6-NEXT:    [[TMP50:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l214.region_id, i32 6, i8** [[TMP45]], i8** [[TMP46]], i64* [[TMP47]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 1, i32 [[TMP49]])
+// CHECK6-NEXT:    [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
+// CHECK6-NEXT:    br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK6:       omp_offload.failed:
 // CHECK6-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l214(%struct.S1* [[THIS1]], i64 [[TMP7]], i64 2, i64 [[TMP2]], i16* [[VLA]], i64 [[TMP9]]) #[[ATTR4]]
 // CHECK6-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -9349,15 +9225,15 @@ int bar(int n){
 // CHECK6-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l214(%struct.S1* [[THIS1]], i64 [[TMP7]], i64 2, i64 [[TMP2]], i16* [[VLA]], i64 [[TMP9]]) #[[ATTR4]]
 // CHECK6-NEXT:    br label [[OMP_IF_END]]
 // CHECK6:       omp_if.end:
-// CHECK6-NEXT:    [[TMP56:%.*]] = mul nsw i64 1, [[TMP2]]
-// CHECK6-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP56]]
+// CHECK6-NEXT:    [[TMP52:%.*]] = mul nsw i64 1, [[TMP2]]
+// CHECK6-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP52]]
 // CHECK6-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
-// CHECK6-NEXT:    [[TMP57:%.*]] = load i16, i16* [[ARRAYIDX6]], align 2
-// CHECK6-NEXT:    [[CONV7:%.*]] = sext i16 [[TMP57]] to i32
-// CHECK6-NEXT:    [[TMP58:%.*]] = load i32, i32* [[B]], align 4
-// CHECK6-NEXT:    [[ADD8:%.*]] = add nsw i32 [[CONV7]], [[TMP58]]
-// CHECK6-NEXT:    [[TMP59:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK6-NEXT:    call void @llvm.stackrestore(i8* [[TMP59]])
+// CHECK6-NEXT:    [[TMP53:%.*]] = load i16, i16* [[ARRAYIDX6]], align 2
+// CHECK6-NEXT:    [[CONV7:%.*]] = sext i16 [[TMP53]] to i32
+// CHECK6-NEXT:    [[TMP54:%.*]] = load i32, i32* [[B]], align 4
+// CHECK6-NEXT:    [[ADD8:%.*]] = add nsw i32 [[CONV7]], [[TMP54]]
+// CHECK6-NEXT:    [[TMP55:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK6-NEXT:    call void @llvm.stackrestore(i8* [[TMP55]])
 // CHECK6-NEXT:    ret i32 [[ADD8]]
 //
 //
@@ -9429,7 +9305,7 @@ int bar(int n){
 // CHECK6-NEXT:    store i8* null, i8** [[TMP26]], align 8
 // CHECK6-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK6-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK6-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l195.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK6-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l195.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK6-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK6-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK6:       omp_offload.failed:
@@ -9498,7 +9374,7 @@ int bar(int n){
 // CHECK6-NEXT:    store i8* null, i8** [[TMP19]], align 8
 // CHECK6-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK6-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK6-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l178.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.15, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK6-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l178.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK6-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK6-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK6:       omp_offload.failed:
@@ -9554,20 +9430,20 @@ int bar(int n){
 // CHECK6-NEXT:    [[TOBOOL6:%.*]] = trunc i8 [[TMP9]] to i1
 // CHECK6-NEXT:    br i1 [[TOBOOL6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK6:       omp_if.then:
-// CHECK6-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], i16* [[TMP4]], i64 [[TMP8]])
+// CHECK6-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], i16* [[TMP4]], i64 [[TMP8]])
 // CHECK6-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK6:       omp_if.else:
 // CHECK6-NEXT:    call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
 // CHECK6-NEXT:    store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
 // CHECK6-NEXT:    store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
-// CHECK6-NEXT:    call void @.omp_outlined..9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], i16* [[TMP4]], i64 [[TMP8]]) #[[ATTR4]]
+// CHECK6-NEXT:    call void @.omp_outlined..10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], i16* [[TMP4]], i64 [[TMP8]]) #[[ATTR4]]
 // CHECK6-NEXT:    call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
 // CHECK6-NEXT:    br label [[OMP_IF_END]]
 // CHECK6:       omp_if.end:
 // CHECK6-NEXT:    ret void
 //
 //
-// CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] {
 // CHECK6-NEXT:  entry:
 // CHECK6-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9756,11 +9632,11 @@ int bar(int n){
 // CHECK6-NEXT:    [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
 // CHECK6-NEXT:    store i8 [[TMP5]], i8* [[CONV5]], align 1
 // CHECK6-NEXT:    [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
-// CHECK6-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK6-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK6-NEXT:    ret void
 //
 //
-// CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK6-NEXT:  entry:
 // CHECK6-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9806,11 +9682,11 @@ int bar(int n){
 // CHECK6-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
 // CHECK6-NEXT:    store i16 [[TMP3]], i16* [[CONV3]], align 2
 // CHECK6-NEXT:    [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8
-// CHECK6-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK6-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK6-NEXT:    ret void
 //
 //
-// CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK6-NEXT:  entry:
 // CHECK6-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9899,7 +9775,7 @@ int bar(int n){
 //
 //
 // CHECK6-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK6-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK6-SAME: () #[[ATTR8:[0-9]+]] {
 // CHECK6-NEXT:  entry:
 // CHECK6-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK6-NEXT:    ret void
@@ -10070,14 +9946,14 @@ int bar(int n){
 // CHECK7-NEXT:    [[TMP63:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK7-NEXT:    [[TMP64:%.*]] = mul nuw i32 [[TMP63]], 8
 // CHECK7-NEXT:    [[TMP65:%.*]] = sext i32 [[TMP64]] to i64
-// CHECK7-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
-// CHECK7-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to i32*
-// CHECK7-NEXT:    store i32 [[TMP57]], i32* [[TMP67]], align 4
-// CHECK7-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
-// CHECK7-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i32*
-// CHECK7-NEXT:    store i32 [[TMP57]], i32* [[TMP69]], align 4
-// CHECK7-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK7-NEXT:    store i64 4, i64* [[TMP70]], align 4
+// CHECK7-NEXT:    [[TMP66:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK7-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP66]], i8* align 4 bitcast ([10 x i64]* @.offload_sizes.8 to i8*), i32 80, i1 false)
+// CHECK7-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
+// CHECK7-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i32*
+// CHECK7-NEXT:    store i32 [[TMP57]], i32* [[TMP68]], align 4
+// CHECK7-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
+// CHECK7-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i32*
+// CHECK7-NEXT:    store i32 [[TMP57]], i32* [[TMP70]], align 4
 // CHECK7-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 0
 // CHECK7-NEXT:    store i8* null, i8** [[TMP71]], align 4
 // CHECK7-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 1
@@ -10086,96 +9962,82 @@ int bar(int n){
 // CHECK7-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 1
 // CHECK7-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [10 x float]**
 // CHECK7-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP75]], align 4
-// CHECK7-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK7-NEXT:    store i64 40, i64* [[TMP76]], align 4
-// CHECK7-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 1
-// CHECK7-NEXT:    store i8* null, i8** [[TMP77]], align 4
-// CHECK7-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 2
-// CHECK7-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32*
-// CHECK7-NEXT:    store i32 [[TMP1]], i32* [[TMP79]], align 4
-// CHECK7-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 2
-// CHECK7-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32*
-// CHECK7-NEXT:    store i32 [[TMP1]], i32* [[TMP81]], align 4
-// CHECK7-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK7-NEXT:    store i64 4, i64* [[TMP82]], align 4
-// CHECK7-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 2
-// CHECK7-NEXT:    store i8* null, i8** [[TMP83]], align 4
-// CHECK7-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 3
+// CHECK7-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 1
+// CHECK7-NEXT:    store i8* null, i8** [[TMP76]], align 4
+// CHECK7-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 2
+// CHECK7-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
+// CHECK7-NEXT:    store i32 [[TMP1]], i32* [[TMP78]], align 4
+// CHECK7-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 2
+// CHECK7-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
+// CHECK7-NEXT:    store i32 [[TMP1]], i32* [[TMP80]], align 4
+// CHECK7-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 2
+// CHECK7-NEXT:    store i8* null, i8** [[TMP81]], align 4
+// CHECK7-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 3
+// CHECK7-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to float**
+// CHECK7-NEXT:    store float* [[VLA]], float** [[TMP83]], align 4
+// CHECK7-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 3
 // CHECK7-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to float**
 // CHECK7-NEXT:    store float* [[VLA]], float** [[TMP85]], align 4
-// CHECK7-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 3
-// CHECK7-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to float**
-// CHECK7-NEXT:    store float* [[VLA]], float** [[TMP87]], align 4
-// CHECK7-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK7-NEXT:    store i64 [[TMP62]], i64* [[TMP88]], align 4
-// CHECK7-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 3
-// CHECK7-NEXT:    store i8* null, i8** [[TMP89]], align 4
-// CHECK7-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 4
+// CHECK7-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK7-NEXT:    store i64 [[TMP62]], i64* [[TMP86]], align 4
+// CHECK7-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 3
+// CHECK7-NEXT:    store i8* null, i8** [[TMP87]], align 4
+// CHECK7-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 4
+// CHECK7-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to [5 x [10 x double]]**
+// CHECK7-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP89]], align 4
+// CHECK7-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 4
 // CHECK7-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to [5 x [10 x double]]**
 // CHECK7-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP91]], align 4
-// CHECK7-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 4
-// CHECK7-NEXT:    [[TMP93:%.*]] = bitcast i8** [[TMP92]] to [5 x [10 x double]]**
-// CHECK7-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP93]], align 4
-// CHECK7-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK7-NEXT:    store i64 400, i64* [[TMP94]], align 4
-// CHECK7-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 4
-// CHECK7-NEXT:    store i8* null, i8** [[TMP95]], align 4
-// CHECK7-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 5
-// CHECK7-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i32*
-// CHECK7-NEXT:    store i32 5, i32* [[TMP97]], align 4
-// CHECK7-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 5
+// CHECK7-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 4
+// CHECK7-NEXT:    store i8* null, i8** [[TMP92]], align 4
+// CHECK7-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 5
+// CHECK7-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to i32*
+// CHECK7-NEXT:    store i32 5, i32* [[TMP94]], align 4
+// CHECK7-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 5
+// CHECK7-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i32*
+// CHECK7-NEXT:    store i32 5, i32* [[TMP96]], align 4
+// CHECK7-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 5
+// CHECK7-NEXT:    store i8* null, i8** [[TMP97]], align 4
+// CHECK7-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 6
 // CHECK7-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i32*
-// CHECK7-NEXT:    store i32 5, i32* [[TMP99]], align 4
-// CHECK7-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK7-NEXT:    store i64 4, i64* [[TMP100]], align 4
-// CHECK7-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 5
-// CHECK7-NEXT:    store i8* null, i8** [[TMP101]], align 4
-// CHECK7-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 6
-// CHECK7-NEXT:    [[TMP103:%.*]] = bitcast i8** [[TMP102]] to i32*
-// CHECK7-NEXT:    store i32 [[TMP3]], i32* [[TMP103]], align 4
-// CHECK7-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 6
-// CHECK7-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i32*
-// CHECK7-NEXT:    store i32 [[TMP3]], i32* [[TMP105]], align 4
-// CHECK7-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK7-NEXT:    store i64 4, i64* [[TMP106]], align 4
-// CHECK7-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 6
-// CHECK7-NEXT:    store i8* null, i8** [[TMP107]], align 4
-// CHECK7-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 7
-// CHECK7-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to double**
-// CHECK7-NEXT:    store double* [[VLA1]], double** [[TMP109]], align 4
-// CHECK7-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 7
-// CHECK7-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to double**
-// CHECK7-NEXT:    store double* [[VLA1]], double** [[TMP111]], align 4
-// CHECK7-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK7-NEXT:    store i64 [[TMP65]], i64* [[TMP112]], align 4
-// CHECK7-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 7
+// CHECK7-NEXT:    store i32 [[TMP3]], i32* [[TMP99]], align 4
+// CHECK7-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 6
+// CHECK7-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i32*
+// CHECK7-NEXT:    store i32 [[TMP3]], i32* [[TMP101]], align 4
+// CHECK7-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 6
+// CHECK7-NEXT:    store i8* null, i8** [[TMP102]], align 4
+// CHECK7-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 7
+// CHECK7-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to double**
+// CHECK7-NEXT:    store double* [[VLA1]], double** [[TMP104]], align 4
+// CHECK7-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 7
+// CHECK7-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to double**
+// CHECK7-NEXT:    store double* [[VLA1]], double** [[TMP106]], align 4
+// CHECK7-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK7-NEXT:    store i64 [[TMP65]], i64* [[TMP107]], align 4
+// CHECK7-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 7
+// CHECK7-NEXT:    store i8* null, i8** [[TMP108]], align 4
+// CHECK7-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 8
+// CHECK7-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to %struct.TT**
+// CHECK7-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP110]], align 4
+// CHECK7-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 8
+// CHECK7-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to %struct.TT**
+// CHECK7-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP112]], align 4
+// CHECK7-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 8
 // CHECK7-NEXT:    store i8* null, i8** [[TMP113]], align 4
-// CHECK7-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 8
-// CHECK7-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to %struct.TT**
-// CHECK7-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP115]], align 4
-// CHECK7-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 8
-// CHECK7-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to %struct.TT**
-// CHECK7-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP117]], align 4
-// CHECK7-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK7-NEXT:    store i64 12, i64* [[TMP118]], align 4
-// CHECK7-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 8
-// CHECK7-NEXT:    store i8* null, i8** [[TMP119]], align 4
-// CHECK7-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 9
-// CHECK7-NEXT:    [[TMP121:%.*]] = bitcast i8** [[TMP120]] to i32*
-// CHECK7-NEXT:    store i32 [[TMP59]], i32* [[TMP121]], align 4
-// CHECK7-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 9
-// CHECK7-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i32*
-// CHECK7-NEXT:    store i32 [[TMP59]], i32* [[TMP123]], align 4
-// CHECK7-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK7-NEXT:    store i64 4, i64* [[TMP124]], align 4
-// CHECK7-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 9
-// CHECK7-NEXT:    store i8* null, i8** [[TMP125]], align 4
-// CHECK7-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
-// CHECK7-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
-// CHECK7-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK7-NEXT:    [[TMP129:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140.region_id, i32 10, i8** [[TMP126]], i8** [[TMP127]], i64* [[TMP128]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK7-NEXT:    [[TMP130:%.*]] = icmp ne i32 [[TMP129]], 0
-// CHECK7-NEXT:    br i1 [[TMP130]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
+// CHECK7-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 9
+// CHECK7-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32*
+// CHECK7-NEXT:    store i32 [[TMP59]], i32* [[TMP115]], align 4
+// CHECK7-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 9
+// CHECK7-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32*
+// CHECK7-NEXT:    store i32 [[TMP59]], i32* [[TMP117]], align 4
+// CHECK7-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 9
+// CHECK7-NEXT:    store i8* null, i8** [[TMP118]], align 4
+// CHECK7-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
+// CHECK7-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
+// CHECK7-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK7-NEXT:    [[TMP122:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140.region_id, i32 10, i8** [[TMP119]], i8** [[TMP120]], i64* [[TMP121]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK7-NEXT:    [[TMP123:%.*]] = icmp ne i32 [[TMP122]], 0
+// CHECK7-NEXT:    br i1 [[TMP123]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK7:       omp_offload.failed17:
 // CHECK7-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140(i32 [[TMP57]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]], i32 [[TMP59]]) #[[ATTR4]]
 // CHECK7-NEXT:    br label [[OMP_OFFLOAD_CONT18]]
@@ -10185,10 +10047,10 @@ int bar(int n){
 // CHECK7-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140(i32 [[TMP57]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]], i32 [[TMP59]]) #[[ATTR4]]
 // CHECK7-NEXT:    br label [[OMP_IF_END20]]
 // CHECK7:       omp_if.end20:
-// CHECK7-NEXT:    [[TMP131:%.*]] = load i32, i32* [[A]], align 4
-// CHECK7-NEXT:    [[TMP132:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK7-NEXT:    call void @llvm.stackrestore(i8* [[TMP132]])
-// CHECK7-NEXT:    ret i32 [[TMP131]]
+// CHECK7-NEXT:    [[TMP124:%.*]] = load i32, i32* [[A]], align 4
+// CHECK7-NEXT:    [[TMP125:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK7-NEXT:    call void @llvm.stackrestore(i8* [[TMP125]])
+// CHECK7-NEXT:    ret i32 [[TMP124]]
 //
 //
 // CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96
@@ -10932,14 +10794,14 @@ int bar(int n){
 // CHECK7-NEXT:    [[TMP10:%.*]] = mul nuw i32 2, [[TMP1]]
 // CHECK7-NEXT:    [[TMP11:%.*]] = mul nuw i32 [[TMP10]], 2
 // CHECK7-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
-// CHECK7-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK7-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to %struct.S1**
-// CHECK7-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP14]], align 4
-// CHECK7-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK7-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to double**
-// CHECK7-NEXT:    store double* [[A]], double** [[TMP16]], align 4
-// CHECK7-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK7-NEXT:    store i64 8, i64* [[TMP17]], align 4
+// CHECK7-NEXT:    [[TMP13:%.*]] = bitcast [6 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK7-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 bitcast ([6 x i64]* @.offload_sizes.11 to i8*), i32 48, i1 false)
+// CHECK7-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK7-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to %struct.S1**
+// CHECK7-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP15]], align 4
+// CHECK7-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK7-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to double**
+// CHECK7-NEXT:    store double* [[A]], double** [[TMP17]], align 4
 // CHECK7-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK7-NEXT:    store i8* null, i8** [[TMP18]], align 4
 // CHECK7-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -10948,59 +10810,51 @@ int bar(int n){
 // CHECK7-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK7-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
 // CHECK7-NEXT:    store i32 [[TMP6]], i32* [[TMP22]], align 4
-// CHECK7-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK7-NEXT:    store i64 4, i64* [[TMP23]], align 4
-// CHECK7-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK7-NEXT:    store i8* null, i8** [[TMP24]], align 4
-// CHECK7-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK7-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
-// CHECK7-NEXT:    store i32 2, i32* [[TMP26]], align 4
-// CHECK7-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK7-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
-// CHECK7-NEXT:    store i32 2, i32* [[TMP28]], align 4
-// CHECK7-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK7-NEXT:    store i64 4, i64* [[TMP29]], align 4
-// CHECK7-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK7-NEXT:    store i8* null, i8** [[TMP30]], align 4
-// CHECK7-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK7-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK7-NEXT:    store i8* null, i8** [[TMP23]], align 4
+// CHECK7-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK7-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
+// CHECK7-NEXT:    store i32 2, i32* [[TMP25]], align 4
+// CHECK7-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK7-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK7-NEXT:    store i32 2, i32* [[TMP27]], align 4
+// CHECK7-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK7-NEXT:    store i8* null, i8** [[TMP28]], align 4
+// CHECK7-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK7-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32*
+// CHECK7-NEXT:    store i32 [[TMP1]], i32* [[TMP30]], align 4
+// CHECK7-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK7-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i32*
 // CHECK7-NEXT:    store i32 [[TMP1]], i32* [[TMP32]], align 4
-// CHECK7-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK7-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i32*
-// CHECK7-NEXT:    store i32 [[TMP1]], i32* [[TMP34]], align 4
-// CHECK7-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK7-NEXT:    store i64 4, i64* [[TMP35]], align 4
-// CHECK7-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK7-NEXT:    store i8* null, i8** [[TMP36]], align 4
-// CHECK7-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK7-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i16**
-// CHECK7-NEXT:    store i16* [[VLA]], i16** [[TMP38]], align 4
-// CHECK7-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK7-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i16**
-// CHECK7-NEXT:    store i16* [[VLA]], i16** [[TMP40]], align 4
-// CHECK7-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK7-NEXT:    store i64 [[TMP12]], i64* [[TMP41]], align 4
-// CHECK7-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK7-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK7-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
-// CHECK7-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK7-NEXT:    store i32 [[TMP8]], i32* [[TMP44]], align 4
-// CHECK7-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
-// CHECK7-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK7-NEXT:    store i32 [[TMP8]], i32* [[TMP46]], align 4
-// CHECK7-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK7-NEXT:    store i64 1, i64* [[TMP47]], align 4
-// CHECK7-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
-// CHECK7-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK7-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK7-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK7-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK7-NEXT:    [[TMP52:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1
-// CHECK7-NEXT:    [[TOBOOL4:%.*]] = trunc i8 [[TMP52]] to i1
-// CHECK7-NEXT:    [[TMP53:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1
-// CHECK7-NEXT:    [[TMP54:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l214.region_id, i32 6, i8** [[TMP49]], i8** [[TMP50]], i64* [[TMP51]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 1, i32 [[TMP53]])
-// CHECK7-NEXT:    [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0
-// CHECK7-NEXT:    br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK7-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK7-NEXT:    store i8* null, i8** [[TMP33]], align 4
+// CHECK7-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK7-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
+// CHECK7-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 4
+// CHECK7-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK7-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
+// CHECK7-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 4
+// CHECK7-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK7-NEXT:    store i64 [[TMP12]], i64* [[TMP38]], align 4
+// CHECK7-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK7-NEXT:    store i8* null, i8** [[TMP39]], align 4
+// CHECK7-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
+// CHECK7-NEXT:    [[TMP41:%.*]] = bitcast i8** [[TMP40]] to i32*
+// CHECK7-NEXT:    store i32 [[TMP8]], i32* [[TMP41]], align 4
+// CHECK7-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK7-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK7-NEXT:    store i32 [[TMP8]], i32* [[TMP43]], align 4
+// CHECK7-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
+// CHECK7-NEXT:    store i8* null, i8** [[TMP44]], align 4
+// CHECK7-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK7-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK7-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK7-NEXT:    [[TMP48:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1
+// CHECK7-NEXT:    [[TOBOOL4:%.*]] = trunc i8 [[TMP48]] to i1
+// CHECK7-NEXT:    [[TMP49:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1
+// CHECK7-NEXT:    [[TMP50:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l214.region_id, i32 6, i8** [[TMP45]], i8** [[TMP46]], i64* [[TMP47]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 1, i32 [[TMP49]])
+// CHECK7-NEXT:    [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
+// CHECK7-NEXT:    br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK7:       omp_offload.failed:
 // CHECK7-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l214(%struct.S1* [[THIS1]], i32 [[TMP6]], i32 2, i32 [[TMP1]], i16* [[VLA]], i32 [[TMP8]]) #[[ATTR4]]
 // CHECK7-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -11010,15 +10864,15 @@ int bar(int n){
 // CHECK7-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l214(%struct.S1* [[THIS1]], i32 [[TMP6]], i32 2, i32 [[TMP1]], i16* [[VLA]], i32 [[TMP8]]) #[[ATTR4]]
 // CHECK7-NEXT:    br label [[OMP_IF_END]]
 // CHECK7:       omp_if.end:
-// CHECK7-NEXT:    [[TMP56:%.*]] = mul nsw i32 1, [[TMP1]]
-// CHECK7-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP56]]
+// CHECK7-NEXT:    [[TMP52:%.*]] = mul nsw i32 1, [[TMP1]]
+// CHECK7-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP52]]
 // CHECK7-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
-// CHECK7-NEXT:    [[TMP57:%.*]] = load i16, i16* [[ARRAYIDX5]], align 2
-// CHECK7-NEXT:    [[CONV6:%.*]] = sext i16 [[TMP57]] to i32
-// CHECK7-NEXT:    [[TMP58:%.*]] = load i32, i32* [[B]], align 4
-// CHECK7-NEXT:    [[ADD7:%.*]] = add nsw i32 [[CONV6]], [[TMP58]]
-// CHECK7-NEXT:    [[TMP59:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK7-NEXT:    call void @llvm.stackrestore(i8* [[TMP59]])
+// CHECK7-NEXT:    [[TMP53:%.*]] = load i16, i16* [[ARRAYIDX5]], align 2
+// CHECK7-NEXT:    [[CONV6:%.*]] = sext i16 [[TMP53]] to i32
+// CHECK7-NEXT:    [[TMP54:%.*]] = load i32, i32* [[B]], align 4
+// CHECK7-NEXT:    [[ADD7:%.*]] = add nsw i32 [[CONV6]], [[TMP54]]
+// CHECK7-NEXT:    [[TMP55:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK7-NEXT:    call void @llvm.stackrestore(i8* [[TMP55]])
 // CHECK7-NEXT:    ret i32 [[ADD7]]
 //
 //
@@ -11089,7 +10943,7 @@ int bar(int n){
 // CHECK7-NEXT:    store i8* null, i8** [[TMP26]], align 4
 // CHECK7-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK7-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l195.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK7-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l195.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK7-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK7-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK7:       omp_offload.failed:
@@ -11157,7 +11011,7 @@ int bar(int n){
 // CHECK7-NEXT:    store i8* null, i8** [[TMP19]], align 4
 // CHECK7-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK7-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l178.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.15, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK7-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l178.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK7-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK7-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK7:       omp_offload.failed:
@@ -11211,20 +11065,20 @@ int bar(int n){
 // CHECK7-NEXT:    [[TOBOOL4:%.*]] = trunc i8 [[TMP9]] to i1
 // CHECK7-NEXT:    br i1 [[TOBOOL4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK7:       omp_if.then:
-// CHECK7-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*, i32)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], i16* [[TMP4]], i32 [[TMP8]])
+// CHECK7-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*, i32)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], i16* [[TMP4]], i32 [[TMP8]])
 // CHECK7-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK7:       omp_if.else:
 // CHECK7-NEXT:    call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
 // CHECK7-NEXT:    store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
 // CHECK7-NEXT:    store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
-// CHECK7-NEXT:    call void @.omp_outlined..9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], i16* [[TMP4]], i32 [[TMP8]]) #[[ATTR4]]
+// CHECK7-NEXT:    call void @.omp_outlined..10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], i16* [[TMP4]], i32 [[TMP8]]) #[[ATTR4]]
 // CHECK7-NEXT:    call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
 // CHECK7-NEXT:    br label [[OMP_IF_END]]
 // CHECK7:       omp_if.end:
 // CHECK7-NEXT:    ret void
 //
 //
-// CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] {
 // CHECK7-NEXT:  entry:
 // CHECK7-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -11410,11 +11264,11 @@ int bar(int n){
 // CHECK7-NEXT:    [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
 // CHECK7-NEXT:    store i8 [[TMP5]], i8* [[CONV3]], align 1
 // CHECK7-NEXT:    [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
-// CHECK7-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK7-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK7-NEXT:    ret void
 //
 //
-// CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK7-NEXT:  entry:
 // CHECK7-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -11457,11 +11311,11 @@ int bar(int n){
 // CHECK7-NEXT:    [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
 // CHECK7-NEXT:    store i16 [[TMP3]], i16* [[CONV1]], align 2
 // CHECK7-NEXT:    [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4
-// CHECK7-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK7-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK7-NEXT:    ret void
 //
 //
-// CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK7-NEXT:  entry:
 // CHECK7-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -11549,7 +11403,7 @@ int bar(int n){
 //
 //
 // CHECK7-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK7-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK7-SAME: () #[[ATTR8:[0-9]+]] {
 // CHECK7-NEXT:  entry:
 // CHECK7-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK7-NEXT:    ret void
@@ -11720,14 +11574,14 @@ int bar(int n){
 // CHECK8-NEXT:    [[TMP63:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK8-NEXT:    [[TMP64:%.*]] = mul nuw i32 [[TMP63]], 8
 // CHECK8-NEXT:    [[TMP65:%.*]] = sext i32 [[TMP64]] to i64
-// CHECK8-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
-// CHECK8-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to i32*
-// CHECK8-NEXT:    store i32 [[TMP57]], i32* [[TMP67]], align 4
-// CHECK8-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
-// CHECK8-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i32*
-// CHECK8-NEXT:    store i32 [[TMP57]], i32* [[TMP69]], align 4
-// CHECK8-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK8-NEXT:    store i64 4, i64* [[TMP70]], align 4
+// CHECK8-NEXT:    [[TMP66:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK8-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP66]], i8* align 4 bitcast ([10 x i64]* @.offload_sizes.8 to i8*), i32 80, i1 false)
+// CHECK8-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
+// CHECK8-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i32*
+// CHECK8-NEXT:    store i32 [[TMP57]], i32* [[TMP68]], align 4
+// CHECK8-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
+// CHECK8-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i32*
+// CHECK8-NEXT:    store i32 [[TMP57]], i32* [[TMP70]], align 4
 // CHECK8-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 0
 // CHECK8-NEXT:    store i8* null, i8** [[TMP71]], align 4
 // CHECK8-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 1
@@ -11736,96 +11590,82 @@ int bar(int n){
 // CHECK8-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 1
 // CHECK8-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [10 x float]**
 // CHECK8-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP75]], align 4
-// CHECK8-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK8-NEXT:    store i64 40, i64* [[TMP76]], align 4
-// CHECK8-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 1
-// CHECK8-NEXT:    store i8* null, i8** [[TMP77]], align 4
-// CHECK8-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 2
-// CHECK8-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32*
-// CHECK8-NEXT:    store i32 [[TMP1]], i32* [[TMP79]], align 4
-// CHECK8-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 2
-// CHECK8-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32*
-// CHECK8-NEXT:    store i32 [[TMP1]], i32* [[TMP81]], align 4
-// CHECK8-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK8-NEXT:    store i64 4, i64* [[TMP82]], align 4
-// CHECK8-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 2
-// CHECK8-NEXT:    store i8* null, i8** [[TMP83]], align 4
-// CHECK8-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 3
+// CHECK8-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 1
+// CHECK8-NEXT:    store i8* null, i8** [[TMP76]], align 4
+// CHECK8-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 2
+// CHECK8-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
+// CHECK8-NEXT:    store i32 [[TMP1]], i32* [[TMP78]], align 4
+// CHECK8-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 2
+// CHECK8-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
+// CHECK8-NEXT:    store i32 [[TMP1]], i32* [[TMP80]], align 4
+// CHECK8-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 2
+// CHECK8-NEXT:    store i8* null, i8** [[TMP81]], align 4
+// CHECK8-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 3
+// CHECK8-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to float**
+// CHECK8-NEXT:    store float* [[VLA]], float** [[TMP83]], align 4
+// CHECK8-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 3
 // CHECK8-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to float**
 // CHECK8-NEXT:    store float* [[VLA]], float** [[TMP85]], align 4
-// CHECK8-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 3
-// CHECK8-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to float**
-// CHECK8-NEXT:    store float* [[VLA]], float** [[TMP87]], align 4
-// CHECK8-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK8-NEXT:    store i64 [[TMP62]], i64* [[TMP88]], align 4
-// CHECK8-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 3
-// CHECK8-NEXT:    store i8* null, i8** [[TMP89]], align 4
-// CHECK8-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 4
+// CHECK8-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK8-NEXT:    store i64 [[TMP62]], i64* [[TMP86]], align 4
+// CHECK8-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 3
+// CHECK8-NEXT:    store i8* null, i8** [[TMP87]], align 4
+// CHECK8-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 4
+// CHECK8-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to [5 x [10 x double]]**
+// CHECK8-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP89]], align 4
+// CHECK8-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 4
 // CHECK8-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to [5 x [10 x double]]**
 // CHECK8-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP91]], align 4
-// CHECK8-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 4
-// CHECK8-NEXT:    [[TMP93:%.*]] = bitcast i8** [[TMP92]] to [5 x [10 x double]]**
-// CHECK8-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP93]], align 4
-// CHECK8-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK8-NEXT:    store i64 400, i64* [[TMP94]], align 4
-// CHECK8-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 4
-// CHECK8-NEXT:    store i8* null, i8** [[TMP95]], align 4
-// CHECK8-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 5
-// CHECK8-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i32*
-// CHECK8-NEXT:    store i32 5, i32* [[TMP97]], align 4
-// CHECK8-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 5
+// CHECK8-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 4
+// CHECK8-NEXT:    store i8* null, i8** [[TMP92]], align 4
+// CHECK8-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 5
+// CHECK8-NEXT:    [[TMP94:%.*]] = bitcast i8** [[TMP93]] to i32*
+// CHECK8-NEXT:    store i32 5, i32* [[TMP94]], align 4
+// CHECK8-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 5
+// CHECK8-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i32*
+// CHECK8-NEXT:    store i32 5, i32* [[TMP96]], align 4
+// CHECK8-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 5
+// CHECK8-NEXT:    store i8* null, i8** [[TMP97]], align 4
+// CHECK8-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 6
 // CHECK8-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i32*
-// CHECK8-NEXT:    store i32 5, i32* [[TMP99]], align 4
-// CHECK8-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK8-NEXT:    store i64 4, i64* [[TMP100]], align 4
-// CHECK8-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 5
-// CHECK8-NEXT:    store i8* null, i8** [[TMP101]], align 4
-// CHECK8-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 6
-// CHECK8-NEXT:    [[TMP103:%.*]] = bitcast i8** [[TMP102]] to i32*
-// CHECK8-NEXT:    store i32 [[TMP3]], i32* [[TMP103]], align 4
-// CHECK8-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 6
-// CHECK8-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i32*
-// CHECK8-NEXT:    store i32 [[TMP3]], i32* [[TMP105]], align 4
-// CHECK8-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK8-NEXT:    store i64 4, i64* [[TMP106]], align 4
-// CHECK8-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 6
-// CHECK8-NEXT:    store i8* null, i8** [[TMP107]], align 4
-// CHECK8-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 7
-// CHECK8-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to double**
-// CHECK8-NEXT:    store double* [[VLA1]], double** [[TMP109]], align 4
-// CHECK8-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 7
-// CHECK8-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to double**
-// CHECK8-NEXT:    store double* [[VLA1]], double** [[TMP111]], align 4
-// CHECK8-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK8-NEXT:    store i64 [[TMP65]], i64* [[TMP112]], align 4
-// CHECK8-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 7
+// CHECK8-NEXT:    store i32 [[TMP3]], i32* [[TMP99]], align 4
+// CHECK8-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 6
+// CHECK8-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i32*
+// CHECK8-NEXT:    store i32 [[TMP3]], i32* [[TMP101]], align 4
+// CHECK8-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 6
+// CHECK8-NEXT:    store i8* null, i8** [[TMP102]], align 4
+// CHECK8-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 7
+// CHECK8-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to double**
+// CHECK8-NEXT:    store double* [[VLA1]], double** [[TMP104]], align 4
+// CHECK8-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 7
+// CHECK8-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to double**
+// CHECK8-NEXT:    store double* [[VLA1]], double** [[TMP106]], align 4
+// CHECK8-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK8-NEXT:    store i64 [[TMP65]], i64* [[TMP107]], align 4
+// CHECK8-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 7
+// CHECK8-NEXT:    store i8* null, i8** [[TMP108]], align 4
+// CHECK8-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 8
+// CHECK8-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to %struct.TT**
+// CHECK8-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP110]], align 4
+// CHECK8-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 8
+// CHECK8-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to %struct.TT**
+// CHECK8-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP112]], align 4
+// CHECK8-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 8
 // CHECK8-NEXT:    store i8* null, i8** [[TMP113]], align 4
-// CHECK8-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 8
-// CHECK8-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to %struct.TT**
-// CHECK8-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP115]], align 4
-// CHECK8-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 8
-// CHECK8-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to %struct.TT**
-// CHECK8-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP117]], align 4
-// CHECK8-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK8-NEXT:    store i64 12, i64* [[TMP118]], align 4
-// CHECK8-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 8
-// CHECK8-NEXT:    store i8* null, i8** [[TMP119]], align 4
-// CHECK8-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 9
-// CHECK8-NEXT:    [[TMP121:%.*]] = bitcast i8** [[TMP120]] to i32*
-// CHECK8-NEXT:    store i32 [[TMP59]], i32* [[TMP121]], align 4
-// CHECK8-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 9
-// CHECK8-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i32*
-// CHECK8-NEXT:    store i32 [[TMP59]], i32* [[TMP123]], align 4
-// CHECK8-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK8-NEXT:    store i64 4, i64* [[TMP124]], align 4
-// CHECK8-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 9
-// CHECK8-NEXT:    store i8* null, i8** [[TMP125]], align 4
-// CHECK8-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
-// CHECK8-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
-// CHECK8-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK8-NEXT:    [[TMP129:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140.region_id, i32 10, i8** [[TMP126]], i8** [[TMP127]], i64* [[TMP128]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.8, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
-// CHECK8-NEXT:    [[TMP130:%.*]] = icmp ne i32 [[TMP129]], 0
-// CHECK8-NEXT:    br i1 [[TMP130]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
+// CHECK8-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 9
+// CHECK8-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32*
+// CHECK8-NEXT:    store i32 [[TMP59]], i32* [[TMP115]], align 4
+// CHECK8-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 9
+// CHECK8-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32*
+// CHECK8-NEXT:    store i32 [[TMP59]], i32* [[TMP117]], align 4
+// CHECK8-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS16]], i32 0, i32 9
+// CHECK8-NEXT:    store i8* null, i8** [[TMP118]], align 4
+// CHECK8-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0
+// CHECK8-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS15]], i32 0, i32 0
+// CHECK8-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK8-NEXT:    [[TMP122:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140.region_id, i32 10, i8** [[TMP119]], i8** [[TMP120]], i64* [[TMP121]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK8-NEXT:    [[TMP123:%.*]] = icmp ne i32 [[TMP122]], 0
+// CHECK8-NEXT:    br i1 [[TMP123]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK8:       omp_offload.failed17:
 // CHECK8-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140(i32 [[TMP57]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]], i32 [[TMP59]]) #[[ATTR4]]
 // CHECK8-NEXT:    br label [[OMP_OFFLOAD_CONT18]]
@@ -11835,10 +11675,10 @@ int bar(int n){
 // CHECK8-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140(i32 [[TMP57]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]], i32 [[TMP59]]) #[[ATTR4]]
 // CHECK8-NEXT:    br label [[OMP_IF_END20]]
 // CHECK8:       omp_if.end20:
-// CHECK8-NEXT:    [[TMP131:%.*]] = load i32, i32* [[A]], align 4
-// CHECK8-NEXT:    [[TMP132:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK8-NEXT:    call void @llvm.stackrestore(i8* [[TMP132]])
-// CHECK8-NEXT:    ret i32 [[TMP131]]
+// CHECK8-NEXT:    [[TMP124:%.*]] = load i32, i32* [[A]], align 4
+// CHECK8-NEXT:    [[TMP125:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK8-NEXT:    call void @llvm.stackrestore(i8* [[TMP125]])
+// CHECK8-NEXT:    ret i32 [[TMP124]]
 //
 //
 // CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96
@@ -12582,14 +12422,14 @@ int bar(int n){
 // CHECK8-NEXT:    [[TMP10:%.*]] = mul nuw i32 2, [[TMP1]]
 // CHECK8-NEXT:    [[TMP11:%.*]] = mul nuw i32 [[TMP10]], 2
 // CHECK8-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
-// CHECK8-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK8-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to %struct.S1**
-// CHECK8-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP14]], align 4
-// CHECK8-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK8-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to double**
-// CHECK8-NEXT:    store double* [[A]], double** [[TMP16]], align 4
-// CHECK8-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK8-NEXT:    store i64 8, i64* [[TMP17]], align 4
+// CHECK8-NEXT:    [[TMP13:%.*]] = bitcast [6 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK8-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 bitcast ([6 x i64]* @.offload_sizes.11 to i8*), i32 48, i1 false)
+// CHECK8-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK8-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to %struct.S1**
+// CHECK8-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP15]], align 4
+// CHECK8-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK8-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to double**
+// CHECK8-NEXT:    store double* [[A]], double** [[TMP17]], align 4
 // CHECK8-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK8-NEXT:    store i8* null, i8** [[TMP18]], align 4
 // CHECK8-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -12598,59 +12438,51 @@ int bar(int n){
 // CHECK8-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK8-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
 // CHECK8-NEXT:    store i32 [[TMP6]], i32* [[TMP22]], align 4
-// CHECK8-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK8-NEXT:    store i64 4, i64* [[TMP23]], align 4
-// CHECK8-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK8-NEXT:    store i8* null, i8** [[TMP24]], align 4
-// CHECK8-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK8-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
-// CHECK8-NEXT:    store i32 2, i32* [[TMP26]], align 4
-// CHECK8-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK8-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
-// CHECK8-NEXT:    store i32 2, i32* [[TMP28]], align 4
-// CHECK8-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK8-NEXT:    store i64 4, i64* [[TMP29]], align 4
-// CHECK8-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK8-NEXT:    store i8* null, i8** [[TMP30]], align 4
-// CHECK8-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK8-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK8-NEXT:    store i8* null, i8** [[TMP23]], align 4
+// CHECK8-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK8-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
+// CHECK8-NEXT:    store i32 2, i32* [[TMP25]], align 4
+// CHECK8-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK8-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK8-NEXT:    store i32 2, i32* [[TMP27]], align 4
+// CHECK8-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK8-NEXT:    store i8* null, i8** [[TMP28]], align 4
+// CHECK8-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK8-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32*
+// CHECK8-NEXT:    store i32 [[TMP1]], i32* [[TMP30]], align 4
+// CHECK8-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK8-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i32*
 // CHECK8-NEXT:    store i32 [[TMP1]], i32* [[TMP32]], align 4
-// CHECK8-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK8-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i32*
-// CHECK8-NEXT:    store i32 [[TMP1]], i32* [[TMP34]], align 4
-// CHECK8-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK8-NEXT:    store i64 4, i64* [[TMP35]], align 4
-// CHECK8-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK8-NEXT:    store i8* null, i8** [[TMP36]], align 4
-// CHECK8-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK8-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i16**
-// CHECK8-NEXT:    store i16* [[VLA]], i16** [[TMP38]], align 4
-// CHECK8-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK8-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i16**
-// CHECK8-NEXT:    store i16* [[VLA]], i16** [[TMP40]], align 4
-// CHECK8-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK8-NEXT:    store i64 [[TMP12]], i64* [[TMP41]], align 4
-// CHECK8-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK8-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK8-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
-// CHECK8-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK8-NEXT:    store i32 [[TMP8]], i32* [[TMP44]], align 4
-// CHECK8-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
-// CHECK8-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK8-NEXT:    store i32 [[TMP8]], i32* [[TMP46]], align 4
-// CHECK8-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK8-NEXT:    store i64 1, i64* [[TMP47]], align 4
-// CHECK8-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
-// CHECK8-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK8-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK8-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK8-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK8-NEXT:    [[TMP52:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1
-// CHECK8-NEXT:    [[TOBOOL4:%.*]] = trunc i8 [[TMP52]] to i1
-// CHECK8-NEXT:    [[TMP53:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1
-// CHECK8-NEXT:    [[TMP54:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l214.region_id, i32 6, i8** [[TMP49]], i8** [[TMP50]], i64* [[TMP51]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 1, i32 [[TMP53]])
-// CHECK8-NEXT:    [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0
-// CHECK8-NEXT:    br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK8-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK8-NEXT:    store i8* null, i8** [[TMP33]], align 4
+// CHECK8-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK8-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
+// CHECK8-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 4
+// CHECK8-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK8-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
+// CHECK8-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 4
+// CHECK8-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK8-NEXT:    store i64 [[TMP12]], i64* [[TMP38]], align 4
+// CHECK8-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK8-NEXT:    store i8* null, i8** [[TMP39]], align 4
+// CHECK8-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
+// CHECK8-NEXT:    [[TMP41:%.*]] = bitcast i8** [[TMP40]] to i32*
+// CHECK8-NEXT:    store i32 [[TMP8]], i32* [[TMP41]], align 4
+// CHECK8-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK8-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK8-NEXT:    store i32 [[TMP8]], i32* [[TMP43]], align 4
+// CHECK8-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
+// CHECK8-NEXT:    store i8* null, i8** [[TMP44]], align 4
+// CHECK8-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK8-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK8-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK8-NEXT:    [[TMP48:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1
+// CHECK8-NEXT:    [[TOBOOL4:%.*]] = trunc i8 [[TMP48]] to i1
+// CHECK8-NEXT:    [[TMP49:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1
+// CHECK8-NEXT:    [[TMP50:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l214.region_id, i32 6, i8** [[TMP45]], i8** [[TMP46]], i64* [[TMP47]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 1, i32 [[TMP49]])
+// CHECK8-NEXT:    [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
+// CHECK8-NEXT:    br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK8:       omp_offload.failed:
 // CHECK8-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l214(%struct.S1* [[THIS1]], i32 [[TMP6]], i32 2, i32 [[TMP1]], i16* [[VLA]], i32 [[TMP8]]) #[[ATTR4]]
 // CHECK8-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -12660,15 +12492,15 @@ int bar(int n){
 // CHECK8-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l214(%struct.S1* [[THIS1]], i32 [[TMP6]], i32 2, i32 [[TMP1]], i16* [[VLA]], i32 [[TMP8]]) #[[ATTR4]]
 // CHECK8-NEXT:    br label [[OMP_IF_END]]
 // CHECK8:       omp_if.end:
-// CHECK8-NEXT:    [[TMP56:%.*]] = mul nsw i32 1, [[TMP1]]
-// CHECK8-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP56]]
+// CHECK8-NEXT:    [[TMP52:%.*]] = mul nsw i32 1, [[TMP1]]
+// CHECK8-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP52]]
 // CHECK8-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
-// CHECK8-NEXT:    [[TMP57:%.*]] = load i16, i16* [[ARRAYIDX5]], align 2
-// CHECK8-NEXT:    [[CONV6:%.*]] = sext i16 [[TMP57]] to i32
-// CHECK8-NEXT:    [[TMP58:%.*]] = load i32, i32* [[B]], align 4
-// CHECK8-NEXT:    [[ADD7:%.*]] = add nsw i32 [[CONV6]], [[TMP58]]
-// CHECK8-NEXT:    [[TMP59:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK8-NEXT:    call void @llvm.stackrestore(i8* [[TMP59]])
+// CHECK8-NEXT:    [[TMP53:%.*]] = load i16, i16* [[ARRAYIDX5]], align 2
+// CHECK8-NEXT:    [[CONV6:%.*]] = sext i16 [[TMP53]] to i32
+// CHECK8-NEXT:    [[TMP54:%.*]] = load i32, i32* [[B]], align 4
+// CHECK8-NEXT:    [[ADD7:%.*]] = add nsw i32 [[CONV6]], [[TMP54]]
+// CHECK8-NEXT:    [[TMP55:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK8-NEXT:    call void @llvm.stackrestore(i8* [[TMP55]])
 // CHECK8-NEXT:    ret i32 [[ADD7]]
 //
 //
@@ -12739,7 +12571,7 @@ int bar(int n){
 // CHECK8-NEXT:    store i8* null, i8** [[TMP26]], align 4
 // CHECK8-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK8-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK8-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l195.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK8-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l195.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK8-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK8-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK8:       omp_offload.failed:
@@ -12807,7 +12639,7 @@ int bar(int n){
 // CHECK8-NEXT:    store i8* null, i8** [[TMP19]], align 4
 // CHECK8-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK8-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK8-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l178.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.15, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
+// CHECK8-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l178.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
 // CHECK8-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK8-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK8:       omp_offload.failed:
@@ -12861,20 +12693,20 @@ int bar(int n){
 // CHECK8-NEXT:    [[TOBOOL4:%.*]] = trunc i8 [[TMP9]] to i1
 // CHECK8-NEXT:    br i1 [[TOBOOL4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
 // CHECK8:       omp_if.then:
-// CHECK8-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*, i32)* @.omp_outlined..9 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], i16* [[TMP4]], i32 [[TMP8]])
+// CHECK8-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*, i32)* @.omp_outlined..10 to void (i32*, i32*, ...)*), %struct.S1* [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], i16* [[TMP4]], i32 [[TMP8]])
 // CHECK8-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK8:       omp_if.else:
 // CHECK8-NEXT:    call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
 // CHECK8-NEXT:    store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4
 // CHECK8-NEXT:    store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4
-// CHECK8-NEXT:    call void @.omp_outlined..9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], i16* [[TMP4]], i32 [[TMP8]]) #[[ATTR4]]
+// CHECK8-NEXT:    call void @.omp_outlined..10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], i16* [[TMP4]], i32 [[TMP8]]) #[[ATTR4]]
 // CHECK8-NEXT:    call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
 // CHECK8-NEXT:    br label [[OMP_IF_END]]
 // CHECK8:       omp_if.end:
 // CHECK8-NEXT:    ret void
 //
 //
-// CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK8-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] {
 // CHECK8-NEXT:  entry:
 // CHECK8-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -13060,11 +12892,11 @@ int bar(int n){
 // CHECK8-NEXT:    [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
 // CHECK8-NEXT:    store i8 [[TMP5]], i8* [[CONV3]], align 1
 // CHECK8-NEXT:    [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
-// CHECK8-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK8-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK8-NEXT:    ret void
 //
 //
-// CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK8-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK8-NEXT:  entry:
 // CHECK8-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -13107,11 +12939,11 @@ int bar(int n){
 // CHECK8-NEXT:    [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
 // CHECK8-NEXT:    store i16 [[TMP3]], i16* [[CONV1]], align 2
 // CHECK8-NEXT:    [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4
-// CHECK8-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK8-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK8-NEXT:    ret void
 //
 //
-// CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK8-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK8-NEXT:  entry:
 // CHECK8-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -13199,7 +13031,7 @@ int bar(int n){
 //
 //
 // CHECK8-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK8-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK8-SAME: () #[[ATTR8:[0-9]+]] {
 // CHECK8-NEXT:  entry:
 // CHECK8-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK8-NEXT:    ret void

diff  --git a/clang/test/OpenMP/target_simd_codegen.cpp b/clang/test/OpenMP/target_simd_codegen.cpp
index ad1ad44e2ddf6..65ab50bfa4966 100644
--- a/clang/test/OpenMP/target_simd_codegen.cpp
+++ b/clang/test/OpenMP/target_simd_codegen.cpp
@@ -83,11 +83,14 @@
 // CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [3 x i64] [i64 800, i64 800, i64 800]
 // CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i64] [i64 4, i64 2]
 // CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i64] [i64 800, i64 800]
+// CHECK-DAG: [[SIZET4:@.+]] = private unnamed_addr constant [9 x i64] [i64 4, i64 40, i64 {{8|4}}, i64 0, i64 400, i64 {{8|4}}, i64 {{8|4}}, i64 0, i64 {{16|12}}]
 // CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i64] [i64 800, i64 547, i64 800, i64 547, i64 547, i64 800, i64 800, i64 547, i64 547]
 // CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i64] [i64 4, i64 2, i64 40]
 // CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i64] [i64 800, i64 800, i64 547]
 // CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i64] [i64 4, i64 2, i64 1, i64 40]
 // CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i64] [i64 800, i64 800, i64 800, i64 547]
+// OMP45-DAG: [[SIZET7:@.+]] = private unnamed_addr constant [5 x i64] [i64 {{8|4}}, i64 4, i64 {{8|4}}, i64 {{8|4}}, i64 0]
+// OMP50-DAG: [[SIZET7:@.+]] = private unnamed_addr constant [6 x i64] [i64 {{8|4}}, i64 4, i64 {{8|4}}, i64 {{8|4}}, i64 0, i64 1]
 // OMP45-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i64] [i64 547, i64 800, i64 800, i64 800, i64 547]
 // OMP50-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [6 x i64] [i64 547, i64 800, i64 800, i64 800, i64 547, i64 800]
 // CHECK-DAG: @{{.*}} = weak constant i8 0
@@ -242,32 +245,25 @@ int foo(int n) {
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[SR]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S:%[^,]+]], i32 0, i32 0
 
-  // CHECK-DAG:   [[SADDR0:%.+]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S]], i32 0, i32 [[IDX0:[0-9]+]]
-  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX0]]
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX0:[0-9]+]]
   // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX0]]
-  // CHECK-DAG:   [[SADDR1:%.+]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S]], i32 0, i32 [[IDX1:[0-9]+]]
-  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX1]]
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX1:[0-9]+]]
   // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX1]]
-  // CHECK-DAG:   [[SADDR2:%.+]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S]], i32 0, i32 [[IDX2:[0-9]+]]
-  // CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX2]]
+  // CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX2:[0-9]+]]
   // CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX2]]
   // CHECK-DAG:   [[SADDR3:%.+]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S]], i32 0, i32 [[IDX3:[0-9]+]]
   // CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX3]]
   // CHECK-DAG:   [[PADDR3:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX3]]
-  // CHECK-DAG:   [[SADDR4:%.+]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S]], i32 0, i32 [[IDX4:[0-9]+]]
-  // CHECK-DAG:   [[BPADDR4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX4]]
+  // CHECK-DAG:   [[BPADDR4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX4:[0-9]+]]
   // CHECK-DAG:   [[PADDR4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX4]]
-  // CHECK-DAG:   [[SADDR5:%.+]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S]], i32 0, i32 [[IDX5:[0-9]+]]
-  // CHECK-DAG:   [[BPADDR5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX5]]
+  // CHECK-DAG:   [[BPADDR5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX5:[0-9]+]]
   // CHECK-DAG:   [[PADDR5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX5]]
-  // CHECK-DAG:   [[SADDR6:%.+]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S]], i32 0, i32 [[IDX6:[0-9]+]]
-  // CHECK-DAG:   [[BPADDR6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX6]]
+  // CHECK-DAG:   [[BPADDR6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX6:[0-9]+]]
   // CHECK-DAG:   [[PADDR6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX6]]
   // CHECK-DAG:   [[SADDR7:%.+]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S]], i32 0, i32 [[IDX7:[0-9]+]]
   // CHECK-DAG:   [[BPADDR7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX7]]
   // CHECK-DAG:   [[PADDR7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX7]]
-  // CHECK-DAG:   [[SADDR8:%.+]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S]], i32 0, i32 [[IDX8:[0-9]+]]
-  // CHECK-DAG:   [[BPADDR8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX8]]
+  // CHECK-DAG:   [[BPADDR8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX8:[0-9]+]]
   // CHECK-DAG:   [[PADDR8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX8]]
 
   // The names below are not necessarily consistent with the names used for the
@@ -276,19 +272,16 @@ int foo(int n) {
   // CHECK-DAG:   store i[[SZ]] [[A_CVAL]], i[[SZ]]* [[CPADDR0:%.+]],
   // CHECK-DAG:   [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
   // CHECK-DAG:   [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
-  // CHECK-DAG:   store i64 4, i64* {{%[^,]+}}
 
   // CHECK-DAG:   store [10 x float]* %{{.+}}, [10 x float]** [[CBPADDR1:%.+]],
   // CHECK-DAG:   store [10 x float]* %{{.+}}, [10 x float]** [[CPADDR1:%.+]],
   // CHECK-DAG:   [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to [10 x float]**
   // CHECK-DAG:   [[CPADDR1]] = bitcast i8** {{%[^,]+}} to [10 x float]**
-  // CHECK-DAG:   store i64 40, i64* {{%[^,]+}}
 
   // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR2:%.+]],
   // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR2:%.+]],
   // CHECK-DAG:   [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
   // CHECK-DAG:   [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
-  // CHECK-DAG:   store i64 {{4|8}}, i64* {{%[^,]+}}
 
   // CHECK-DAG:   store float* %{{.+}}, float** [[CBPADDR3:%.+]],
   // CHECK-DAG:   store float* %{{.+}}, float** [[CPADDR3:%.+]],
@@ -300,19 +293,16 @@ int foo(int n) {
   // CHECK-DAG:   store [5 x [10 x double]]* %{{.+}}, [5 x [10 x double]]** [[CPADDR4:%.+]],
   // CHECK-DAG:   [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to [5 x [10 x double]]**
   // CHECK-DAG:   [[CPADDR4]] = bitcast i8** {{%[^,]+}} to [5 x [10 x double]]**
-  // CHECK-DAG:   store i64 400, i64* {{%[^,]+}}
 
   // CHECK-DAG:   store i[[SZ]] 5, i[[SZ]]* [[CBPADDR5:%.+]],
   // CHECK-DAG:   store i[[SZ]] 5, i[[SZ]]* [[CPADDR5:%.+]],
   // CHECK-DAG:   [[CBPADDR5]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
   // CHECK-DAG:   [[CPADDR5]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
-  // CHECK-DAG:   store i64 {{4|8}}, i64* {{%[^,]+}}
 
   // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR6:%.+]],
   // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR6:%.+]],
   // CHECK-DAG:   [[CBPADDR6]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
   // CHECK-DAG:   [[CPADDR6]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
-  // CHECK-DAG:   store i64 {{4|8}}, i64* {{%[^,]+}}
 
   // CHECK-DAG:   store double* %{{.+}}, double** [[CBPADDR7:%.+]],
   // CHECK-DAG:   store double* %{{.+}}, double** [[CPADDR7:%.+]],
@@ -324,7 +314,6 @@ int foo(int n) {
   // CHECK-DAG:   store [[TT]]* %{{.+}}, [[TT]]** [[CPADDR8:%.+]],
   // CHECK-DAG:   [[CBPADDR8]] = bitcast i8** {{%[^,]+}} to [[TT]]**
   // CHECK-DAG:   [[CPADDR8]] = bitcast i8** {{%[^,]+}} to [[TT]]**
-  // CHECK-DAG:   store i64 {{12|16}}, i64* {{%[^,]+}}
 
   // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
   // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
@@ -536,42 +525,31 @@ int bar(int n){
 // OMP50-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @{{.+}}, i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x  i64], [6 x  i64]* [[MAPT7]], i32 0, i32 0), i8** null, i8** null, i32 1, i32 1)
 // OMP45-DAG:   [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
 // OMP45-DAG:   [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
-// OMP45-DAG:   [[SR]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S:%.+]], i32 0, i32 0
-// OMP45-DAG:   [[SADDR0:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S]], i32 [[IDX0:[0-9]+]]
-// OMP45-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX0]]
+// OMP45-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX0:[0-9]+]]
 // OMP45-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX0]]
-// OMP45-DAG:   [[SADDR1:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S]], i32 [[IDX1:[0-9]+]]
-// OMP45-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX1]]
+// OMP45-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX1:[0-9]+]]
 // OMP45-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX1]]
-// OMP45-DAG:   [[SADDR2:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S]], i32 [[IDX2:[0-9]+]]
-// OMP45-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX2]]
+// OMP45-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX2:[0-9]+]]
 // OMP45-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX2]]
-// OMP45-DAG:   [[SADDR3:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S]], i32 [[IDX3:[0-9]+]]
-// OMP45-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX3]]
+// OMP45-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX3:[0-9]+]]
 // OMP45-DAG:   [[PADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX3]]
 // OMP45-DAG:   [[SADDR4:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S]], i32 [[IDX4:[0-9]+]]
 // OMP45-DAG:   [[BPADDR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX4]]
 // OMP45-DAG:   [[PADDR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX4]]
 // OMP50-DAG:   [[BPR]] = getelementptr inbounds [6 x  i8*], [6 x  i8*]* [[BP:%.+]], i32 0, i32 0
 // OMP50-DAG:   [[PR]] = getelementptr inbounds [6 x  i8*], [6 x  i8*]* [[P:%.+]], i32 0, i32 0
-// OMP50-DAG:   [[SR]] = getelementptr inbounds [6 x  i64], [6 x  i64]* [[S:%.+]], i32 0, i32 0
-// OMP50-DAG:   [[SADDR0:%.+]] = getelementptr inbounds [6 x  i64], [6 x  i64]* [[S]], i32 [[IDX0:[0-9]+]]
-// OMP50-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [6 x  i8*], [6 x  i8*]* [[BP]], i32 [[IDX0]]
+// OMP50-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [6 x  i8*], [6 x  i8*]* [[BP]], i32 [[IDX0:[0-9]+]]
 // OMP50-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [6 x  i8*], [6 x  i8*]* [[P]], i32 [[IDX0]]
-// OMP50-DAG:   [[SADDR1:%.+]] = getelementptr inbounds [6 x  i64], [6 x  i64]* [[S]], i32 [[IDX1:[0-9]+]]
-// OMP50-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [6 x  i8*], [6 x  i8*]* [[BP]], i32 [[IDX1]]
+// OMP50-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [6 x  i8*], [6 x  i8*]* [[BP]], i32 [[IDX1:[0-9]+]]
 // OMP50-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [6 x  i8*], [6 x  i8*]* [[P]], i32 [[IDX1]]
-// OMP50-DAG:   [[SADDR2:%.+]] = getelementptr inbounds [6 x  i64], [6 x  i64]* [[S]], i32 [[IDX2:[0-9]+]]
-// OMP50-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [6 x  i8*], [6 x  i8*]* [[BP]], i32 [[IDX2]]
+// OMP50-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [6 x  i8*], [6 x  i8*]* [[BP]], i32 [[IDX2:[0-9]+]]
 // OMP50-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [6 x  i8*], [6 x  i8*]* [[P]], i32 [[IDX2]]
-// OMP50-DAG:   [[SADDR3:%.+]] = getelementptr inbounds [6 x  i64], [6 x  i64]* [[S]], i32 [[IDX3:[0-9]+]]
-// OMP50-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [6 x  i8*], [6 x  i8*]* [[BP]], i32 [[IDX3]]
+// OMP50-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [6 x  i8*], [6 x  i8*]* [[BP]], i32 [[IDX3:[0-9]+]]
 // OMP50-DAG:   [[PADDR3:%.+]] = getelementptr inbounds [6 x  i8*], [6 x  i8*]* [[P]], i32 [[IDX3]]
 // OMP50-DAG:   [[SADDR4:%.+]] = getelementptr inbounds [6 x  i64], [6 x  i64]* [[S]], i32 [[IDX4:[0-9]+]]
 // OMP50-DAG:   [[BPADDR4:%.+]] = getelementptr inbounds [6 x  i8*], [6 x  i8*]* [[BP]], i32 [[IDX4]]
 // OMP50-DAG:   [[PADDR4:%.+]] = getelementptr inbounds [6 x  i8*], [6 x  i8*]* [[P]], i32 [[IDX4]]
-// OMP50-DAG:   [[SADDR5:%.+]] = getelementptr inbounds [6 x  i64], [6 x  i64]* [[S]], i32 [[IDX5:[0-9]+]]
-// OMP50-DAG:   [[BPADDR5:%.+]] = getelementptr inbounds [6 x  i8*], [6 x  i8*]* [[BP]], i32 [[IDX5]]
+// OMP50-DAG:   [[BPADDR5:%.+]] = getelementptr inbounds [6 x  i8*], [6 x  i8*]* [[BP]], i32 [[IDX5:[0-9]+]]
 // OMP50-DAG:   [[PADDR5:%.+]] = getelementptr inbounds [6 x  i8*], [6 x  i8*]* [[P]], i32 [[IDX5]]
 
 // The names below are not necessarily consistent with the names used for the
@@ -580,25 +558,21 @@ int bar(int n){
 // CHECK-DAG:   store [[S1]]* %{{.+}}, [[S1]]** [[CPADDR0:%.+]],
 // CHECK-DAG:   [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to [[S1]]**
 // CHECK-DAG:   [[CPADDR0]] = bitcast i8** {{%[^,]+}} to [[S1]]**
-// CHECK-DAG:   store i64 {{4|8}}, i64* {{%[^,]+}}
 
 // CHECK-DAG:   store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CBPADDR1:%.+]],
 // CHECK-DAG:   store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CPADDR1:%.+]],
 // CHECK-DAG:   [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
 // CHECK-DAG:   [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
-// CHECK-DAG:   store i64 4, i64* {{%[^,]+}}
 
 // CHECK-DAG:   store i[[SZ]] 2, i[[SZ]]* [[CBPADDR2:%.+]],
 // CHECK-DAG:   store i[[SZ]] 2, i[[SZ]]* [[CPADDR2:%.+]],
 // CHECK-DAG:   [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
 // CHECK-DAG:   [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
-// CHECK-DAG:   store i64 {{4|8}}, i64* {{%[^,]+}}
 
 // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR3:%.+]],
 // CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR3:%.+]],
 // CHECK-DAG:   [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
 // CHECK-DAG:   [[CPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
-// CHECK-DAG:   store i64 {{4|8}}, i64* {{%[^,]+}}
 
 // CHECK-DAG:   store i16* %{{.+}}, i16** [[CBPADDR4:%.+]],
 // CHECK-DAG:   store i16* %{{.+}}, i16** [[CPADDR4:%.+]],
@@ -610,7 +584,6 @@ int bar(int n){
 // OMP50-DAG:   store i[[SZ]] [[SIMD_COND]], i[[SZ]]* [[CPADDR5:%.+]],
 // OMP50-DAG:   [[CBPADDR5]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
 // OMP50-DAG:   [[CPADDR5]] = bitcast i8** {{%[^,]+}} to i[[SZ]]*
-// OMP50-DAG:   store i64 1, i64* {{%[^,]+}}
 
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]

diff  --git a/clang/test/OpenMP/target_teams_codegen.cpp b/clang/test/OpenMP/target_teams_codegen.cpp
index 9291aa1e658cd..903bfa537ead9 100644
--- a/clang/test/OpenMP/target_teams_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_codegen.cpp
@@ -516,14 +516,14 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP92:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK1-NEXT:    [[TMP93:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK1-NEXT:    [[TMP94:%.*]] = mul nuw i64 [[TMP93]], 8
-// CHECK1-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP90]], i64* [[TMP96]], align 8
-// CHECK1-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP90]], i64* [[TMP98]], align 8
-// CHECK1-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    store i64 4, i64* [[TMP99]], align 8
+// CHECK1-NEXT:    [[TMP95:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP95]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.10 to i8*), i64 72, i1 false)
+// CHECK1-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP90]], i64* [[TMP97]], align 8
+// CHECK1-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP90]], i64* [[TMP99]], align 8
 // CHECK1-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 0
 // CHECK1-NEXT:    store i8* null, i8** [[TMP100]], align 8
 // CHECK1-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 1
@@ -532,86 +532,74 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to [10 x float]**
 // CHECK1-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP104]], align 8
-// CHECK1-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK1-NEXT:    store i64 40, i64* [[TMP105]], align 8
-// CHECK1-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 1
-// CHECK1-NEXT:    store i8* null, i8** [[TMP106]], align 8
-// CHECK1-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 2
-// CHECK1-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP108]], align 8
-// CHECK1-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 2
-// CHECK1-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP110]], align 8
-// CHECK1-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK1-NEXT:    store i64 8, i64* [[TMP111]], align 8
-// CHECK1-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 2
-// CHECK1-NEXT:    store i8* null, i8** [[TMP112]], align 8
-// CHECK1-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 1
+// CHECK1-NEXT:    store i8* null, i8** [[TMP105]], align 8
+// CHECK1-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP107]], align 8
+// CHECK1-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP109]], align 8
+// CHECK1-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 2
+// CHECK1-NEXT:    store i8* null, i8** [[TMP110]], align 8
+// CHECK1-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to float**
+// CHECK1-NEXT:    store float* [[VLA]], float** [[TMP112]], align 8
+// CHECK1-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 3
 // CHECK1-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to float**
 // CHECK1-NEXT:    store float* [[VLA]], float** [[TMP114]], align 8
-// CHECK1-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 3
-// CHECK1-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to float**
-// CHECK1-NEXT:    store float* [[VLA]], float** [[TMP116]], align 8
-// CHECK1-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK1-NEXT:    store i64 [[TMP92]], i64* [[TMP117]], align 8
-// CHECK1-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 3
-// CHECK1-NEXT:    store i8* null, i8** [[TMP118]], align 8
-// CHECK1-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK1-NEXT:    store i64 [[TMP92]], i64* [[TMP115]], align 8
+// CHECK1-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 3
+// CHECK1-NEXT:    store i8* null, i8** [[TMP116]], align 8
+// CHECK1-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to [5 x [10 x double]]**
+// CHECK1-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP118]], align 8
+// CHECK1-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 4
 // CHECK1-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to [5 x [10 x double]]**
 // CHECK1-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP120]], align 8
-// CHECK1-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 4
-// CHECK1-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to [5 x [10 x double]]**
-// CHECK1-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP122]], align 8
-// CHECK1-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK1-NEXT:    store i64 400, i64* [[TMP123]], align 8
-// CHECK1-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 4
-// CHECK1-NEXT:    store i8* null, i8** [[TMP124]], align 8
-// CHECK1-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 5
-// CHECK1-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i64*
-// CHECK1-NEXT:    store i64 5, i64* [[TMP126]], align 8
-// CHECK1-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 4
+// CHECK1-NEXT:    store i8* null, i8** [[TMP121]], align 8
+// CHECK1-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i64*
+// CHECK1-NEXT:    store i64 5, i64* [[TMP123]], align 8
+// CHECK1-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to i64*
+// CHECK1-NEXT:    store i64 5, i64* [[TMP125]], align 8
+// CHECK1-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 5
+// CHECK1-NEXT:    store i8* null, i8** [[TMP126]], align 8
+// CHECK1-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 6
 // CHECK1-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i64*
-// CHECK1-NEXT:    store i64 5, i64* [[TMP128]], align 8
-// CHECK1-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK1-NEXT:    store i64 8, i64* [[TMP129]], align 8
-// CHECK1-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 5
-// CHECK1-NEXT:    store i8* null, i8** [[TMP130]], align 8
-// CHECK1-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 6
-// CHECK1-NEXT:    [[TMP132:%.*]] = bitcast i8** [[TMP131]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP132]], align 8
-// CHECK1-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 6
-// CHECK1-NEXT:    [[TMP134:%.*]] = bitcast i8** [[TMP133]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP134]], align 8
-// CHECK1-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK1-NEXT:    store i64 8, i64* [[TMP135]], align 8
-// CHECK1-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 6
-// CHECK1-NEXT:    store i8* null, i8** [[TMP136]], align 8
-// CHECK1-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 7
-// CHECK1-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to double**
-// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP138]], align 8
-// CHECK1-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 7
-// CHECK1-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to double**
-// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP140]], align 8
-// CHECK1-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK1-NEXT:    store i64 [[TMP94]], i64* [[TMP141]], align 8
-// CHECK1-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 7
+// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP128]], align 8
+// CHECK1-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 6
+// CHECK1-NEXT:    [[TMP130:%.*]] = bitcast i8** [[TMP129]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP130]], align 8
+// CHECK1-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 6
+// CHECK1-NEXT:    store i8* null, i8** [[TMP131]], align 8
+// CHECK1-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 7
+// CHECK1-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to double**
+// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP133]], align 8
+// CHECK1-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 7
+// CHECK1-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to double**
+// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP135]], align 8
+// CHECK1-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK1-NEXT:    store i64 [[TMP94]], i64* [[TMP136]], align 8
+// CHECK1-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 7
+// CHECK1-NEXT:    store i8* null, i8** [[TMP137]], align 8
+// CHECK1-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 8
+// CHECK1-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to %struct.TT**
+// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP139]], align 8
+// CHECK1-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 8
+// CHECK1-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to %struct.TT**
+// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP141]], align 8
+// CHECK1-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 8
 // CHECK1-NEXT:    store i8* null, i8** [[TMP142]], align 8
-// CHECK1-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 8
-// CHECK1-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to %struct.TT**
-// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP144]], align 8
-// CHECK1-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 8
-// CHECK1-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to %struct.TT**
-// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP146]], align 8
-// CHECK1-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK1-NEXT:    store i64 16, i64* [[TMP147]], align 8
-// CHECK1-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 8
-// CHECK1-NEXT:    store i8* null, i8** [[TMP148]], align 8
-// CHECK1-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP152:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP149]], i8** [[TMP150]], i64* [[TMP151]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP153:%.*]] = icmp ne i32 [[TMP152]], 0
-// CHECK1-NEXT:    br i1 [[TMP153]], label [[OMP_OFFLOAD_FAILED28:%.*]], label [[OMP_OFFLOAD_CONT29:%.*]]
+// CHECK1-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP146:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP143]], i8** [[TMP144]], i64* [[TMP145]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.11, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK1-NEXT:    [[TMP147:%.*]] = icmp ne i32 [[TMP146]], 0
+// CHECK1-NEXT:    br i1 [[TMP147]], label [[OMP_OFFLOAD_FAILED28:%.*]], label [[OMP_OFFLOAD_CONT29:%.*]]
 // CHECK1:       omp_offload.failed28:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142(i64 [[TMP90]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT29]]
@@ -622,52 +610,52 @@ int bar(int n){
 // CHECK1-NEXT:    br label [[OMP_IF_END31]]
 // CHECK1:       omp_if.end31:
 // CHECK1-NEXT:    store i32 0, i32* [[NN]], align 4
-// CHECK1-NEXT:    [[TMP154:%.*]] = load i32, i32* [[NN]], align 4
+// CHECK1-NEXT:    [[TMP148:%.*]] = load i32, i32* [[NN]], align 4
 // CHECK1-NEXT:    [[CONV32:%.*]] = bitcast i64* [[NN_CASTED]] to i32*
-// CHECK1-NEXT:    store i32 [[TMP154]], i32* [[CONV32]], align 4
-// CHECK1-NEXT:    [[TMP155:%.*]] = load i64, i64* [[NN_CASTED]], align 8
-// CHECK1-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP155]], i64* [[TMP157]], align 8
-// CHECK1-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP155]], i64* [[TMP159]], align 8
-// CHECK1-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i64 0, i64 0
-// CHECK1-NEXT:    store i8* null, i8** [[TMP160]], align 8
-// CHECK1-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP163:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154.region_id, i32 1, i8** [[TMP161]], i8** [[TMP162]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.13, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP164:%.*]] = icmp ne i32 [[TMP163]], 0
-// CHECK1-NEXT:    br i1 [[TMP164]], label [[OMP_OFFLOAD_FAILED36:%.*]], label [[OMP_OFFLOAD_CONT37:%.*]]
+// CHECK1-NEXT:    store i32 [[TMP148]], i32* [[CONV32]], align 4
+// CHECK1-NEXT:    [[TMP149:%.*]] = load i64, i64* [[NN_CASTED]], align 8
+// CHECK1-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP149]], i64* [[TMP151]], align 8
+// CHECK1-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP149]], i64* [[TMP153]], align 8
+// CHECK1-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i64 0, i64 0
+// CHECK1-NEXT:    store i8* null, i8** [[TMP154]], align 8
+// CHECK1-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP157:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154.region_id, i32 1, i8** [[TMP155]], i8** [[TMP156]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK1-NEXT:    [[TMP158:%.*]] = icmp ne i32 [[TMP157]], 0
+// CHECK1-NEXT:    br i1 [[TMP158]], label [[OMP_OFFLOAD_FAILED36:%.*]], label [[OMP_OFFLOAD_CONT37:%.*]]
 // CHECK1:       omp_offload.failed36:
-// CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154(i64 [[TMP155]]) #[[ATTR3]]
+// CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154(i64 [[TMP149]]) #[[ATTR3]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT37]]
 // CHECK1:       omp_offload.cont37:
-// CHECK1-NEXT:    [[TMP165:%.*]] = load i32, i32* [[NN]], align 4
+// CHECK1-NEXT:    [[TMP159:%.*]] = load i32, i32* [[NN]], align 4
 // CHECK1-NEXT:    [[CONV39:%.*]] = bitcast i64* [[NN_CASTED38]] to i32*
-// CHECK1-NEXT:    store i32 [[TMP165]], i32* [[CONV39]], align 4
-// CHECK1-NEXT:    [[TMP166:%.*]] = load i64, i64* [[NN_CASTED38]], align 8
-// CHECK1-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS40]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP168:%.*]] = bitcast i8** [[TMP167]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP166]], i64* [[TMP168]], align 8
-// CHECK1-NEXT:    [[TMP169:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS41]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP170:%.*]] = bitcast i8** [[TMP169]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP166]], i64* [[TMP170]], align 8
-// CHECK1-NEXT:    [[TMP171:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS42]], i64 0, i64 0
-// CHECK1-NEXT:    store i8* null, i8** [[TMP171]], align 8
-// CHECK1-NEXT:    [[TMP172:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS40]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP173:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS41]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP174:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157.region_id, i32 1, i8** [[TMP172]], i8** [[TMP173]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP175:%.*]] = icmp ne i32 [[TMP174]], 0
-// CHECK1-NEXT:    br i1 [[TMP175]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
+// CHECK1-NEXT:    store i32 [[TMP159]], i32* [[CONV39]], align 4
+// CHECK1-NEXT:    [[TMP160:%.*]] = load i64, i64* [[NN_CASTED38]], align 8
+// CHECK1-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS40]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP162:%.*]] = bitcast i8** [[TMP161]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP160]], i64* [[TMP162]], align 8
+// CHECK1-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS41]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP164:%.*]] = bitcast i8** [[TMP163]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP160]], i64* [[TMP164]], align 8
+// CHECK1-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS42]], i64 0, i64 0
+// CHECK1-NEXT:    store i8* null, i8** [[TMP165]], align 8
+// CHECK1-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS40]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS41]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP168:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157.region_id, i32 1, i8** [[TMP166]], i8** [[TMP167]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.18, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.19, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK1-NEXT:    [[TMP169:%.*]] = icmp ne i32 [[TMP168]], 0
+// CHECK1-NEXT:    br i1 [[TMP169]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
 // CHECK1:       omp_offload.failed43:
-// CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157(i64 [[TMP166]]) #[[ATTR3]]
+// CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157(i64 [[TMP160]]) #[[ATTR3]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT44]]
 // CHECK1:       omp_offload.cont44:
-// CHECK1-NEXT:    [[TMP176:%.*]] = load i32, i32* [[A]], align 4
-// CHECK1-NEXT:    [[TMP177:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP177]])
-// CHECK1-NEXT:    ret i32 [[TMP176]]
+// CHECK1-NEXT:    [[TMP170:%.*]] = load i32, i32* [[A]], align 4
+// CHECK1-NEXT:    [[TMP171:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP171]])
+// CHECK1-NEXT:    ret i32 [[TMP170]]
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101
@@ -1049,11 +1037,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32*
 // CHECK1-NEXT:    store i32 [[TMP0]], i32* [[CONV1]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8
-// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP1]])
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP1]])
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12
 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR2]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1068,11 +1056,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32*
 // CHECK1-NEXT:    store i32 [[TMP0]], i32* [[CONV1]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8
-// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP1]])
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP1]])
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR2]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1096,11 +1084,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32*
 // CHECK1-NEXT:    store i32 [[TMP0]], i32* [[CONV1]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8
-// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP1]])
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP1]])
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR2]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1110,11 +1098,11 @@ int bar(int n){
 // CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
 // CHECK1-NEXT:    store i64 [[NN]], i64* [[NN_ADDR]], align 8
 // CHECK1-NEXT:    [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32*
-// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32* [[CONV]])
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i32* [[CONV]])
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..17
 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[NN:%.*]]) #[[ATTR2]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1149,7 +1137,7 @@ int bar(int n){
 // CHECK1-NEXT:    store i8* null, i8** [[TMP6]], align 8
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP9:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l182.region_id, i32 1, i8** [[TMP7]], i8** [[TMP8]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK1-NEXT:    [[TMP9:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l182.region_id, i32 1, i8** [[TMP7]], i8** [[TMP8]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.21, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.22, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK1-NEXT:    [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
 // CHECK1-NEXT:    br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
@@ -1165,11 +1153,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
-// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP0]])
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..20 to void (i32*, i32*, ...)*), i64 [[TMP0]])
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..19
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..20
 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1252,14 +1240,14 @@ int bar(int n){
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP8:%.*]] = mul nuw i64 2, [[TMP2]]
 // CHECK1-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2
-// CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK1-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
-// CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK1-NEXT:    store double* [[A]], double** [[TMP13]], align 8
-// CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    store i64 8, i64* [[TMP14]], align 8
+// CHECK1-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.24 to i8*), i64 40, i1 false)
+// CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK1-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 8
+// CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK1-NEXT:    store double* [[A]], double** [[TMP14]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK1-NEXT:    store i8* null, i8** [[TMP15]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1268,46 +1256,40 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i64*
 // CHECK1-NEXT:    store i64 [[TMP6]], i64* [[TMP19]], align 8
-// CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK1-NEXT:    store i64 4, i64* [[TMP20]], align 8
-// CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK1-NEXT:    store i8* null, i8** [[TMP21]], align 8
-// CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK1-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
-// CHECK1-NEXT:    store i64 2, i64* [[TMP23]], align 8
-// CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK1-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK1-NEXT:    store i64 2, i64* [[TMP25]], align 8
-// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK1-NEXT:    store i64 8, i64* [[TMP26]], align 8
-// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK1-NEXT:    store i8* null, i8** [[TMP27]], align 8
-// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK1-NEXT:    store i8* null, i8** [[TMP20]], align 8
+// CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
+// CHECK1-NEXT:    store i64 2, i64* [[TMP22]], align 8
+// CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK1-NEXT:    store i64 2, i64* [[TMP24]], align 8
+// CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK1-NEXT:    store i8* null, i8** [[TMP25]], align 8
+// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP27]], align 8
+// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK1-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
 // CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP29]], align 8
-// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK1-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP31]], align 8
-// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK1-NEXT:    store i64 8, i64* [[TMP32]], align 8
-// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK1-NEXT:    store i8* null, i8** [[TMP33]], align 8
-// CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK1-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 8
-// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK1-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 8
-// CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK1-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 8
-// CHECK1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK1-NEXT:    store i8* null, i8** [[TMP39]], align 8
-// CHECK1-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.23, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK1-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK1-NEXT:    store i8* null, i8** [[TMP30]], align 8
+// CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 8
+// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 8
+// CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK1-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 8
+// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK1-NEXT:    store i8* null, i8** [[TMP36]], align 8
+// CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK1-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK1-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -1317,15 +1299,15 @@ int bar(int n){
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK1-NEXT:    br label [[OMP_IF_END]]
 // CHECK1:       omp_if.end:
-// CHECK1-NEXT:    [[TMP45:%.*]] = mul nsw i64 1, [[TMP2]]
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP45]]
+// CHECK1-NEXT:    [[TMP42:%.*]] = mul nsw i64 1, [[TMP2]]
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP42]]
 // CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
-// CHECK1-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK1-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK1-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP47]]
-// CHECK1-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK1-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK1-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK1-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP44]]
+// CHECK1-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK1-NEXT:    ret i32 [[ADD4]]
 //
 //
@@ -1397,7 +1379,7 @@ int bar(int n){
 // CHECK1-NEXT:    store i8* null, i8** [[TMP26]], align 8
 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l209.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.25, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.26, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK1-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l209.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK1-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK1-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
@@ -1466,7 +1448,7 @@ int bar(int n){
 // CHECK1-NEXT:    store i8* null, i8** [[TMP19]], align 8
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l192.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK1-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l192.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.30, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.31, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK1-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK1-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
@@ -1505,11 +1487,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32*
 // CHECK1-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK1-NEXT:    [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8
-// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1578,11 +1560,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
 // CHECK1-NEXT:    store i8 [[TMP5]], i8* [[CONV5]], align 1
 // CHECK1-NEXT:    [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
-// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..24 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..24
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1643,11 +1625,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
 // CHECK1-NEXT:    store i16 [[TMP3]], i16* [[CONV3]], align 2
 // CHECK1-NEXT:    [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8
-// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..27
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..29
 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1899,14 +1881,14 @@ int bar(int n){
 // CHECK2-NEXT:    [[TMP92:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK2-NEXT:    [[TMP93:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK2-NEXT:    [[TMP94:%.*]] = mul nuw i64 [[TMP93]], 8
-// CHECK2-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP90]], i64* [[TMP96]], align 8
-// CHECK2-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP90]], i64* [[TMP98]], align 8
-// CHECK2-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK2-NEXT:    store i64 4, i64* [[TMP99]], align 8
+// CHECK2-NEXT:    [[TMP95:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP95]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.10 to i8*), i64 72, i1 false)
+// CHECK2-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP90]], i64* [[TMP97]], align 8
+// CHECK2-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP90]], i64* [[TMP99]], align 8
 // CHECK2-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 0
 // CHECK2-NEXT:    store i8* null, i8** [[TMP100]], align 8
 // CHECK2-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 1
@@ -1915,86 +1897,74 @@ int bar(int n){
 // CHECK2-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 1
 // CHECK2-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to [10 x float]**
 // CHECK2-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP104]], align 8
-// CHECK2-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK2-NEXT:    store i64 40, i64* [[TMP105]], align 8
-// CHECK2-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 1
-// CHECK2-NEXT:    store i8* null, i8** [[TMP106]], align 8
-// CHECK2-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 2
-// CHECK2-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP108]], align 8
-// CHECK2-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 2
-// CHECK2-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP110]], align 8
-// CHECK2-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK2-NEXT:    store i64 8, i64* [[TMP111]], align 8
-// CHECK2-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 2
-// CHECK2-NEXT:    store i8* null, i8** [[TMP112]], align 8
-// CHECK2-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 3
+// CHECK2-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 1
+// CHECK2-NEXT:    store i8* null, i8** [[TMP105]], align 8
+// CHECK2-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP107]], align 8
+// CHECK2-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP109]], align 8
+// CHECK2-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 2
+// CHECK2-NEXT:    store i8* null, i8** [[TMP110]], align 8
+// CHECK2-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 3
+// CHECK2-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to float**
+// CHECK2-NEXT:    store float* [[VLA]], float** [[TMP112]], align 8
+// CHECK2-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 3
 // CHECK2-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to float**
 // CHECK2-NEXT:    store float* [[VLA]], float** [[TMP114]], align 8
-// CHECK2-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 3
-// CHECK2-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to float**
-// CHECK2-NEXT:    store float* [[VLA]], float** [[TMP116]], align 8
-// CHECK2-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK2-NEXT:    store i64 [[TMP92]], i64* [[TMP117]], align 8
-// CHECK2-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 3
-// CHECK2-NEXT:    store i8* null, i8** [[TMP118]], align 8
-// CHECK2-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 4
+// CHECK2-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK2-NEXT:    store i64 [[TMP92]], i64* [[TMP115]], align 8
+// CHECK2-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 3
+// CHECK2-NEXT:    store i8* null, i8** [[TMP116]], align 8
+// CHECK2-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 4
+// CHECK2-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to [5 x [10 x double]]**
+// CHECK2-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP118]], align 8
+// CHECK2-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 4
 // CHECK2-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to [5 x [10 x double]]**
 // CHECK2-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP120]], align 8
-// CHECK2-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 4
-// CHECK2-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to [5 x [10 x double]]**
-// CHECK2-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP122]], align 8
-// CHECK2-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK2-NEXT:    store i64 400, i64* [[TMP123]], align 8
-// CHECK2-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 4
-// CHECK2-NEXT:    store i8* null, i8** [[TMP124]], align 8
-// CHECK2-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 5
-// CHECK2-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i64*
-// CHECK2-NEXT:    store i64 5, i64* [[TMP126]], align 8
-// CHECK2-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 5
+// CHECK2-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 4
+// CHECK2-NEXT:    store i8* null, i8** [[TMP121]], align 8
+// CHECK2-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 5
+// CHECK2-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i64*
+// CHECK2-NEXT:    store i64 5, i64* [[TMP123]], align 8
+// CHECK2-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 5
+// CHECK2-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to i64*
+// CHECK2-NEXT:    store i64 5, i64* [[TMP125]], align 8
+// CHECK2-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 5
+// CHECK2-NEXT:    store i8* null, i8** [[TMP126]], align 8
+// CHECK2-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 6
 // CHECK2-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i64*
-// CHECK2-NEXT:    store i64 5, i64* [[TMP128]], align 8
-// CHECK2-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK2-NEXT:    store i64 8, i64* [[TMP129]], align 8
-// CHECK2-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 5
-// CHECK2-NEXT:    store i8* null, i8** [[TMP130]], align 8
-// CHECK2-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 6
-// CHECK2-NEXT:    [[TMP132:%.*]] = bitcast i8** [[TMP131]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP5]], i64* [[TMP132]], align 8
-// CHECK2-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 6
-// CHECK2-NEXT:    [[TMP134:%.*]] = bitcast i8** [[TMP133]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP5]], i64* [[TMP134]], align 8
-// CHECK2-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK2-NEXT:    store i64 8, i64* [[TMP135]], align 8
-// CHECK2-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 6
-// CHECK2-NEXT:    store i8* null, i8** [[TMP136]], align 8
-// CHECK2-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 7
-// CHECK2-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to double**
-// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP138]], align 8
-// CHECK2-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 7
-// CHECK2-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to double**
-// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP140]], align 8
-// CHECK2-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK2-NEXT:    store i64 [[TMP94]], i64* [[TMP141]], align 8
-// CHECK2-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 7
+// CHECK2-NEXT:    store i64 [[TMP5]], i64* [[TMP128]], align 8
+// CHECK2-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 6
+// CHECK2-NEXT:    [[TMP130:%.*]] = bitcast i8** [[TMP129]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP5]], i64* [[TMP130]], align 8
+// CHECK2-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 6
+// CHECK2-NEXT:    store i8* null, i8** [[TMP131]], align 8
+// CHECK2-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 7
+// CHECK2-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to double**
+// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP133]], align 8
+// CHECK2-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 7
+// CHECK2-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to double**
+// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP135]], align 8
+// CHECK2-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK2-NEXT:    store i64 [[TMP94]], i64* [[TMP136]], align 8
+// CHECK2-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 7
+// CHECK2-NEXT:    store i8* null, i8** [[TMP137]], align 8
+// CHECK2-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 8
+// CHECK2-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to %struct.TT**
+// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP139]], align 8
+// CHECK2-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 8
+// CHECK2-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to %struct.TT**
+// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP141]], align 8
+// CHECK2-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 8
 // CHECK2-NEXT:    store i8* null, i8** [[TMP142]], align 8
-// CHECK2-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 8
-// CHECK2-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to %struct.TT**
-// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP144]], align 8
-// CHECK2-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 8
-// CHECK2-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to %struct.TT**
-// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP146]], align 8
-// CHECK2-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK2-NEXT:    store i64 16, i64* [[TMP147]], align 8
-// CHECK2-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 8
-// CHECK2-NEXT:    store i8* null, i8** [[TMP148]], align 8
-// CHECK2-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP152:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP149]], i8** [[TMP150]], i64* [[TMP151]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK2-NEXT:    [[TMP153:%.*]] = icmp ne i32 [[TMP152]], 0
-// CHECK2-NEXT:    br i1 [[TMP153]], label [[OMP_OFFLOAD_FAILED28:%.*]], label [[OMP_OFFLOAD_CONT29:%.*]]
+// CHECK2-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP146:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP143]], i8** [[TMP144]], i64* [[TMP145]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.11, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK2-NEXT:    [[TMP147:%.*]] = icmp ne i32 [[TMP146]], 0
+// CHECK2-NEXT:    br i1 [[TMP147]], label [[OMP_OFFLOAD_FAILED28:%.*]], label [[OMP_OFFLOAD_CONT29:%.*]]
 // CHECK2:       omp_offload.failed28:
 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142(i64 [[TMP90]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
 // CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT29]]
@@ -2005,52 +1975,52 @@ int bar(int n){
 // CHECK2-NEXT:    br label [[OMP_IF_END31]]
 // CHECK2:       omp_if.end31:
 // CHECK2-NEXT:    store i32 0, i32* [[NN]], align 4
-// CHECK2-NEXT:    [[TMP154:%.*]] = load i32, i32* [[NN]], align 4
+// CHECK2-NEXT:    [[TMP148:%.*]] = load i32, i32* [[NN]], align 4
 // CHECK2-NEXT:    [[CONV32:%.*]] = bitcast i64* [[NN_CASTED]] to i32*
-// CHECK2-NEXT:    store i32 [[TMP154]], i32* [[CONV32]], align 4
-// CHECK2-NEXT:    [[TMP155:%.*]] = load i64, i64* [[NN_CASTED]], align 8
-// CHECK2-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP155]], i64* [[TMP157]], align 8
-// CHECK2-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP155]], i64* [[TMP159]], align 8
-// CHECK2-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i64 0, i64 0
-// CHECK2-NEXT:    store i8* null, i8** [[TMP160]], align 8
-// CHECK2-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP163:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154.region_id, i32 1, i8** [[TMP161]], i8** [[TMP162]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.13, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK2-NEXT:    [[TMP164:%.*]] = icmp ne i32 [[TMP163]], 0
-// CHECK2-NEXT:    br i1 [[TMP164]], label [[OMP_OFFLOAD_FAILED36:%.*]], label [[OMP_OFFLOAD_CONT37:%.*]]
+// CHECK2-NEXT:    store i32 [[TMP148]], i32* [[CONV32]], align 4
+// CHECK2-NEXT:    [[TMP149:%.*]] = load i64, i64* [[NN_CASTED]], align 8
+// CHECK2-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP149]], i64* [[TMP151]], align 8
+// CHECK2-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP149]], i64* [[TMP153]], align 8
+// CHECK2-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i64 0, i64 0
+// CHECK2-NEXT:    store i8* null, i8** [[TMP154]], align 8
+// CHECK2-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP157:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154.region_id, i32 1, i8** [[TMP155]], i8** [[TMP156]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK2-NEXT:    [[TMP158:%.*]] = icmp ne i32 [[TMP157]], 0
+// CHECK2-NEXT:    br i1 [[TMP158]], label [[OMP_OFFLOAD_FAILED36:%.*]], label [[OMP_OFFLOAD_CONT37:%.*]]
 // CHECK2:       omp_offload.failed36:
-// CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154(i64 [[TMP155]]) #[[ATTR3]]
+// CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154(i64 [[TMP149]]) #[[ATTR3]]
 // CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT37]]
 // CHECK2:       omp_offload.cont37:
-// CHECK2-NEXT:    [[TMP165:%.*]] = load i32, i32* [[NN]], align 4
+// CHECK2-NEXT:    [[TMP159:%.*]] = load i32, i32* [[NN]], align 4
 // CHECK2-NEXT:    [[CONV39:%.*]] = bitcast i64* [[NN_CASTED38]] to i32*
-// CHECK2-NEXT:    store i32 [[TMP165]], i32* [[CONV39]], align 4
-// CHECK2-NEXT:    [[TMP166:%.*]] = load i64, i64* [[NN_CASTED38]], align 8
-// CHECK2-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS40]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP168:%.*]] = bitcast i8** [[TMP167]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP166]], i64* [[TMP168]], align 8
-// CHECK2-NEXT:    [[TMP169:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS41]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP170:%.*]] = bitcast i8** [[TMP169]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP166]], i64* [[TMP170]], align 8
-// CHECK2-NEXT:    [[TMP171:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS42]], i64 0, i64 0
-// CHECK2-NEXT:    store i8* null, i8** [[TMP171]], align 8
-// CHECK2-NEXT:    [[TMP172:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS40]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP173:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS41]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP174:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157.region_id, i32 1, i8** [[TMP172]], i8** [[TMP173]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK2-NEXT:    [[TMP175:%.*]] = icmp ne i32 [[TMP174]], 0
-// CHECK2-NEXT:    br i1 [[TMP175]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
+// CHECK2-NEXT:    store i32 [[TMP159]], i32* [[CONV39]], align 4
+// CHECK2-NEXT:    [[TMP160:%.*]] = load i64, i64* [[NN_CASTED38]], align 8
+// CHECK2-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS40]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP162:%.*]] = bitcast i8** [[TMP161]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP160]], i64* [[TMP162]], align 8
+// CHECK2-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS41]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP164:%.*]] = bitcast i8** [[TMP163]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP160]], i64* [[TMP164]], align 8
+// CHECK2-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS42]], i64 0, i64 0
+// CHECK2-NEXT:    store i8* null, i8** [[TMP165]], align 8
+// CHECK2-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS40]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS41]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP168:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157.region_id, i32 1, i8** [[TMP166]], i8** [[TMP167]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.18, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.19, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK2-NEXT:    [[TMP169:%.*]] = icmp ne i32 [[TMP168]], 0
+// CHECK2-NEXT:    br i1 [[TMP169]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
 // CHECK2:       omp_offload.failed43:
-// CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157(i64 [[TMP166]]) #[[ATTR3]]
+// CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157(i64 [[TMP160]]) #[[ATTR3]]
 // CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT44]]
 // CHECK2:       omp_offload.cont44:
-// CHECK2-NEXT:    [[TMP176:%.*]] = load i32, i32* [[A]], align 4
-// CHECK2-NEXT:    [[TMP177:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP177]])
-// CHECK2-NEXT:    ret i32 [[TMP176]]
+// CHECK2-NEXT:    [[TMP170:%.*]] = load i32, i32* [[A]], align 4
+// CHECK2-NEXT:    [[TMP171:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP171]])
+// CHECK2-NEXT:    ret i32 [[TMP170]]
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101
@@ -2432,11 +2402,11 @@ int bar(int n){
 // CHECK2-NEXT:    [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32*
 // CHECK2-NEXT:    store i32 [[TMP0]], i32* [[CONV1]], align 4
 // CHECK2-NEXT:    [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP1]])
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP1]])
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..12
 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR2]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2451,11 +2421,11 @@ int bar(int n){
 // CHECK2-NEXT:    [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32*
 // CHECK2-NEXT:    store i32 [[TMP0]], i32* [[CONV1]], align 4
 // CHECK2-NEXT:    [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP1]])
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP1]])
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR2]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2479,11 +2449,11 @@ int bar(int n){
 // CHECK2-NEXT:    [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32*
 // CHECK2-NEXT:    store i32 [[TMP0]], i32* [[CONV1]], align 4
 // CHECK2-NEXT:    [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP1]])
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP1]])
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR2]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2493,11 +2463,11 @@ int bar(int n){
 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
 // CHECK2-NEXT:    store i64 [[NN]], i64* [[NN_ADDR]], align 8
 // CHECK2-NEXT:    [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32*
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32* [[CONV]])
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i32* [[CONV]])
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..17
 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[NN:%.*]]) #[[ATTR2]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2532,7 +2502,7 @@ int bar(int n){
 // CHECK2-NEXT:    store i8* null, i8** [[TMP6]], align 8
 // CHECK2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP9:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l182.region_id, i32 1, i8** [[TMP7]], i8** [[TMP8]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK2-NEXT:    [[TMP9:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l182.region_id, i32 1, i8** [[TMP7]], i8** [[TMP8]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.21, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.22, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK2-NEXT:    [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
 // CHECK2-NEXT:    br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK2:       omp_offload.failed:
@@ -2548,11 +2518,11 @@ int bar(int n){
 // CHECK2-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
 // CHECK2-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP0]])
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..20 to void (i32*, i32*, ...)*), i64 [[TMP0]])
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..19
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..20
 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2635,14 +2605,14 @@ int bar(int n){
 // CHECK2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP8:%.*]] = mul nuw i64 2, [[TMP2]]
 // CHECK2-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2
-// CHECK2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK2-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
-// CHECK2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK2-NEXT:    store double* [[A]], double** [[TMP13]], align 8
-// CHECK2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK2-NEXT:    store i64 8, i64* [[TMP14]], align 8
+// CHECK2-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.24 to i8*), i64 40, i1 false)
+// CHECK2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK2-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 8
+// CHECK2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK2-NEXT:    store double* [[A]], double** [[TMP14]], align 8
 // CHECK2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK2-NEXT:    store i8* null, i8** [[TMP15]], align 8
 // CHECK2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2651,46 +2621,40 @@ int bar(int n){
 // CHECK2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK2-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i64*
 // CHECK2-NEXT:    store i64 [[TMP6]], i64* [[TMP19]], align 8
-// CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK2-NEXT:    store i64 4, i64* [[TMP20]], align 8
-// CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK2-NEXT:    store i8* null, i8** [[TMP21]], align 8
-// CHECK2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK2-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
-// CHECK2-NEXT:    store i64 2, i64* [[TMP23]], align 8
-// CHECK2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK2-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK2-NEXT:    store i64 2, i64* [[TMP25]], align 8
-// CHECK2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK2-NEXT:    store i64 8, i64* [[TMP26]], align 8
-// CHECK2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK2-NEXT:    store i8* null, i8** [[TMP27]], align 8
-// CHECK2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK2-NEXT:    store i8* null, i8** [[TMP20]], align 8
+// CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
+// CHECK2-NEXT:    store i64 2, i64* [[TMP22]], align 8
+// CHECK2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK2-NEXT:    store i64 2, i64* [[TMP24]], align 8
+// CHECK2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK2-NEXT:    store i8* null, i8** [[TMP25]], align 8
+// CHECK2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK2-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP27]], align 8
+// CHECK2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK2-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
 // CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP29]], align 8
-// CHECK2-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK2-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP31]], align 8
-// CHECK2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK2-NEXT:    store i64 8, i64* [[TMP32]], align 8
-// CHECK2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK2-NEXT:    store i8* null, i8** [[TMP33]], align 8
-// CHECK2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK2-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 8
-// CHECK2-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK2-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 8
-// CHECK2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK2-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 8
-// CHECK2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK2-NEXT:    store i8* null, i8** [[TMP39]], align 8
-// CHECK2-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.23, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK2-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK2-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK2-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK2-NEXT:    store i8* null, i8** [[TMP30]], align 8
+// CHECK2-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK2-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 8
+// CHECK2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK2-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 8
+// CHECK2-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK2-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 8
+// CHECK2-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK2-NEXT:    store i8* null, i8** [[TMP36]], align 8
+// CHECK2-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK2-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK2-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK2:       omp_offload.failed:
 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -2700,15 +2664,15 @@ int bar(int n){
 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK2-NEXT:    br label [[OMP_IF_END]]
 // CHECK2:       omp_if.end:
-// CHECK2-NEXT:    [[TMP45:%.*]] = mul nsw i64 1, [[TMP2]]
-// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP45]]
+// CHECK2-NEXT:    [[TMP42:%.*]] = mul nsw i64 1, [[TMP2]]
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP42]]
 // CHECK2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
-// CHECK2-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK2-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK2-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP47]]
-// CHECK2-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK2-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK2-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK2-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP44]]
+// CHECK2-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK2-NEXT:    ret i32 [[ADD4]]
 //
 //
@@ -2780,7 +2744,7 @@ int bar(int n){
 // CHECK2-NEXT:    store i8* null, i8** [[TMP26]], align 8
 // CHECK2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l209.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.25, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.26, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK2-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l209.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK2-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK2-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK2:       omp_offload.failed:
@@ -2849,7 +2813,7 @@ int bar(int n){
 // CHECK2-NEXT:    store i8* null, i8** [[TMP19]], align 8
 // CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l192.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK2-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l192.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.30, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.31, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK2-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK2-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK2:       omp_offload.failed:
@@ -2888,11 +2852,11 @@ int bar(int n){
 // CHECK2-NEXT:    [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32*
 // CHECK2-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK2-NEXT:    [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2961,11 +2925,11 @@ int bar(int n){
 // CHECK2-NEXT:    [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
 // CHECK2-NEXT:    store i8 [[TMP5]], i8* [[CONV5]], align 1
 // CHECK2-NEXT:    [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..24 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..24
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3026,11 +2990,11 @@ int bar(int n){
 // CHECK2-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
 // CHECK2-NEXT:    store i16 [[TMP3]], i16* [[CONV3]], align 2
 // CHECK2-NEXT:    [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..27
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..29
 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3277,14 +3241,14 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP92:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK3-NEXT:    [[TMP93:%.*]] = mul nuw i32 [[TMP92]], 8
 // CHECK3-NEXT:    [[TMP94:%.*]] = sext i32 [[TMP93]] to i64
-// CHECK3-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP88]], i32* [[TMP96]], align 4
-// CHECK3-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP88]], i32* [[TMP98]], align 4
-// CHECK3-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    store i64 4, i64* [[TMP99]], align 4
+// CHECK3-NEXT:    [[TMP95:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP95]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.10 to i8*), i32 72, i1 false)
+// CHECK3-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP88]], i32* [[TMP97]], align 4
+// CHECK3-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP88]], i32* [[TMP99]], align 4
 // CHECK3-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 0
 // CHECK3-NEXT:    store i8* null, i8** [[TMP100]], align 4
 // CHECK3-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 1
@@ -3293,86 +3257,74 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to [10 x float]**
 // CHECK3-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP104]], align 4
-// CHECK3-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK3-NEXT:    store i64 40, i64* [[TMP105]], align 4
-// CHECK3-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 1
-// CHECK3-NEXT:    store i8* null, i8** [[TMP106]], align 4
-// CHECK3-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
-// CHECK3-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP108]], align 4
-// CHECK3-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
-// CHECK3-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP110]], align 4
-// CHECK3-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK3-NEXT:    store i64 4, i64* [[TMP111]], align 4
-// CHECK3-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 2
-// CHECK3-NEXT:    store i8* null, i8** [[TMP112]], align 4
-// CHECK3-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 1
+// CHECK3-NEXT:    store i8* null, i8** [[TMP105]], align 4
+// CHECK3-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP107]], align 4
+// CHECK3-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP109]], align 4
+// CHECK3-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 2
+// CHECK3-NEXT:    store i8* null, i8** [[TMP110]], align 4
+// CHECK3-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to float**
+// CHECK3-NEXT:    store float* [[VLA]], float** [[TMP112]], align 4
+// CHECK3-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
 // CHECK3-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to float**
 // CHECK3-NEXT:    store float* [[VLA]], float** [[TMP114]], align 4
-// CHECK3-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
-// CHECK3-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to float**
-// CHECK3-NEXT:    store float* [[VLA]], float** [[TMP116]], align 4
-// CHECK3-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK3-NEXT:    store i64 [[TMP91]], i64* [[TMP117]], align 4
-// CHECK3-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 3
-// CHECK3-NEXT:    store i8* null, i8** [[TMP118]], align 4
-// CHECK3-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK3-NEXT:    store i64 [[TMP91]], i64* [[TMP115]], align 4
+// CHECK3-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 3
+// CHECK3-NEXT:    store i8* null, i8** [[TMP116]], align 4
+// CHECK3-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to [5 x [10 x double]]**
+// CHECK3-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP118]], align 4
+// CHECK3-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
 // CHECK3-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to [5 x [10 x double]]**
 // CHECK3-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP120]], align 4
-// CHECK3-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
-// CHECK3-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to [5 x [10 x double]]**
-// CHECK3-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP122]], align 4
-// CHECK3-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK3-NEXT:    store i64 400, i64* [[TMP123]], align 4
-// CHECK3-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 4
-// CHECK3-NEXT:    store i8* null, i8** [[TMP124]], align 4
-// CHECK3-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 5
-// CHECK3-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i32*
-// CHECK3-NEXT:    store i32 5, i32* [[TMP126]], align 4
-// CHECK3-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 5
+// CHECK3-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 4
+// CHECK3-NEXT:    store i8* null, i8** [[TMP121]], align 4
+// CHECK3-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 5
+// CHECK3-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i32*
+// CHECK3-NEXT:    store i32 5, i32* [[TMP123]], align 4
+// CHECK3-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 5
+// CHECK3-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to i32*
+// CHECK3-NEXT:    store i32 5, i32* [[TMP125]], align 4
+// CHECK3-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 5
+// CHECK3-NEXT:    store i8* null, i8** [[TMP126]], align 4
+// CHECK3-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 6
 // CHECK3-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i32*
-// CHECK3-NEXT:    store i32 5, i32* [[TMP128]], align 4
-// CHECK3-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK3-NEXT:    store i64 4, i64* [[TMP129]], align 4
-// CHECK3-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 5
-// CHECK3-NEXT:    store i8* null, i8** [[TMP130]], align 4
-// CHECK3-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 6
-// CHECK3-NEXT:    [[TMP132:%.*]] = bitcast i8** [[TMP131]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP132]], align 4
-// CHECK3-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 6
-// CHECK3-NEXT:    [[TMP134:%.*]] = bitcast i8** [[TMP133]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP134]], align 4
-// CHECK3-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK3-NEXT:    store i64 4, i64* [[TMP135]], align 4
-// CHECK3-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 6
-// CHECK3-NEXT:    store i8* null, i8** [[TMP136]], align 4
-// CHECK3-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 7
-// CHECK3-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to double**
-// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP138]], align 4
-// CHECK3-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 7
-// CHECK3-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to double**
-// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP140]], align 4
-// CHECK3-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK3-NEXT:    store i64 [[TMP94]], i64* [[TMP141]], align 4
-// CHECK3-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 7
+// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP128]], align 4
+// CHECK3-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 6
+// CHECK3-NEXT:    [[TMP130:%.*]] = bitcast i8** [[TMP129]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP130]], align 4
+// CHECK3-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 6
+// CHECK3-NEXT:    store i8* null, i8** [[TMP131]], align 4
+// CHECK3-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 7
+// CHECK3-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to double**
+// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP133]], align 4
+// CHECK3-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 7
+// CHECK3-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to double**
+// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP135]], align 4
+// CHECK3-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK3-NEXT:    store i64 [[TMP94]], i64* [[TMP136]], align 4
+// CHECK3-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 7
+// CHECK3-NEXT:    store i8* null, i8** [[TMP137]], align 4
+// CHECK3-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 8
+// CHECK3-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to %struct.TT**
+// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP139]], align 4
+// CHECK3-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 8
+// CHECK3-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to %struct.TT**
+// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP141]], align 4
+// CHECK3-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 8
 // CHECK3-NEXT:    store i8* null, i8** [[TMP142]], align 4
-// CHECK3-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 8
-// CHECK3-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to %struct.TT**
-// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP144]], align 4
-// CHECK3-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 8
-// CHECK3-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to %struct.TT**
-// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP146]], align 4
-// CHECK3-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK3-NEXT:    store i64 12, i64* [[TMP147]], align 4
-// CHECK3-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 8
-// CHECK3-NEXT:    store i8* null, i8** [[TMP148]], align 4
-// CHECK3-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP152:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP149]], i8** [[TMP150]], i64* [[TMP151]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK3-NEXT:    [[TMP153:%.*]] = icmp ne i32 [[TMP152]], 0
-// CHECK3-NEXT:    br i1 [[TMP153]], label [[OMP_OFFLOAD_FAILED23:%.*]], label [[OMP_OFFLOAD_CONT24:%.*]]
+// CHECK3-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP146:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP143]], i8** [[TMP144]], i64* [[TMP145]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.11, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK3-NEXT:    [[TMP147:%.*]] = icmp ne i32 [[TMP146]], 0
+// CHECK3-NEXT:    br i1 [[TMP147]], label [[OMP_OFFLOAD_FAILED23:%.*]], label [[OMP_OFFLOAD_CONT24:%.*]]
 // CHECK3:       omp_offload.failed23:
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142(i32 [[TMP88]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT24]]
@@ -3383,50 +3335,50 @@ int bar(int n){
 // CHECK3-NEXT:    br label [[OMP_IF_END26]]
 // CHECK3:       omp_if.end26:
 // CHECK3-NEXT:    store i32 0, i32* [[NN]], align 4
-// CHECK3-NEXT:    [[TMP154:%.*]] = load i32, i32* [[NN]], align 4
-// CHECK3-NEXT:    store i32 [[TMP154]], i32* [[NN_CASTED]], align 4
-// CHECK3-NEXT:    [[TMP155:%.*]] = load i32, i32* [[NN_CASTED]], align 4
-// CHECK3-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP155]], i32* [[TMP157]], align 4
-// CHECK3-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP155]], i32* [[TMP159]], align 4
-// CHECK3-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS29]], i32 0, i32 0
-// CHECK3-NEXT:    store i8* null, i8** [[TMP160]], align 4
-// CHECK3-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP163:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154.region_id, i32 1, i8** [[TMP161]], i8** [[TMP162]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.13, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK3-NEXT:    [[TMP164:%.*]] = icmp ne i32 [[TMP163]], 0
-// CHECK3-NEXT:    br i1 [[TMP164]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK3-NEXT:    [[TMP148:%.*]] = load i32, i32* [[NN]], align 4
+// CHECK3-NEXT:    store i32 [[TMP148]], i32* [[NN_CASTED]], align 4
+// CHECK3-NEXT:    [[TMP149:%.*]] = load i32, i32* [[NN_CASTED]], align 4
+// CHECK3-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP149]], i32* [[TMP151]], align 4
+// CHECK3-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP149]], i32* [[TMP153]], align 4
+// CHECK3-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS29]], i32 0, i32 0
+// CHECK3-NEXT:    store i8* null, i8** [[TMP154]], align 4
+// CHECK3-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP157:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154.region_id, i32 1, i8** [[TMP155]], i8** [[TMP156]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK3-NEXT:    [[TMP158:%.*]] = icmp ne i32 [[TMP157]], 0
+// CHECK3-NEXT:    br i1 [[TMP158]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
 // CHECK3:       omp_offload.failed30:
-// CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154(i32 [[TMP155]]) #[[ATTR3]]
+// CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154(i32 [[TMP149]]) #[[ATTR3]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
 // CHECK3:       omp_offload.cont31:
-// CHECK3-NEXT:    [[TMP165:%.*]] = load i32, i32* [[NN]], align 4
-// CHECK3-NEXT:    store i32 [[TMP165]], i32* [[NN_CASTED32]], align 4
-// CHECK3-NEXT:    [[TMP166:%.*]] = load i32, i32* [[NN_CASTED32]], align 4
-// CHECK3-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP168:%.*]] = bitcast i8** [[TMP167]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP166]], i32* [[TMP168]], align 4
-// CHECK3-NEXT:    [[TMP169:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP170:%.*]] = bitcast i8** [[TMP169]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP166]], i32* [[TMP170]], align 4
-// CHECK3-NEXT:    [[TMP171:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 0
-// CHECK3-NEXT:    store i8* null, i8** [[TMP171]], align 4
-// CHECK3-NEXT:    [[TMP172:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP173:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP174:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157.region_id, i32 1, i8** [[TMP172]], i8** [[TMP173]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK3-NEXT:    [[TMP175:%.*]] = icmp ne i32 [[TMP174]], 0
-// CHECK3-NEXT:    br i1 [[TMP175]], label [[OMP_OFFLOAD_FAILED36:%.*]], label [[OMP_OFFLOAD_CONT37:%.*]]
+// CHECK3-NEXT:    [[TMP159:%.*]] = load i32, i32* [[NN]], align 4
+// CHECK3-NEXT:    store i32 [[TMP159]], i32* [[NN_CASTED32]], align 4
+// CHECK3-NEXT:    [[TMP160:%.*]] = load i32, i32* [[NN_CASTED32]], align 4
+// CHECK3-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP162:%.*]] = bitcast i8** [[TMP161]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP160]], i32* [[TMP162]], align 4
+// CHECK3-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP164:%.*]] = bitcast i8** [[TMP163]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP160]], i32* [[TMP164]], align 4
+// CHECK3-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 0
+// CHECK3-NEXT:    store i8* null, i8** [[TMP165]], align 4
+// CHECK3-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP168:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157.region_id, i32 1, i8** [[TMP166]], i8** [[TMP167]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.18, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.19, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK3-NEXT:    [[TMP169:%.*]] = icmp ne i32 [[TMP168]], 0
+// CHECK3-NEXT:    br i1 [[TMP169]], label [[OMP_OFFLOAD_FAILED36:%.*]], label [[OMP_OFFLOAD_CONT37:%.*]]
 // CHECK3:       omp_offload.failed36:
-// CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157(i32 [[TMP166]]) #[[ATTR3]]
+// CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157(i32 [[TMP160]]) #[[ATTR3]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT37]]
 // CHECK3:       omp_offload.cont37:
-// CHECK3-NEXT:    [[TMP176:%.*]] = load i32, i32* [[A]], align 4
-// CHECK3-NEXT:    [[TMP177:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP177]])
-// CHECK3-NEXT:    ret i32 [[TMP176]]
+// CHECK3-NEXT:    [[TMP170:%.*]] = load i32, i32* [[A]], align 4
+// CHECK3-NEXT:    [[TMP171:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP171]])
+// CHECK3-NEXT:    ret i32 [[TMP170]]
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101
@@ -3793,11 +3745,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[NN_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[TMP0]], i32* [[NN_CASTED]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = load i32, i32* [[NN_CASTED]], align 4
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP1]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP1]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..12
 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR2]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -3810,11 +3762,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[NN_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[TMP0]], i32* [[NN_CASTED]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = load i32, i32* [[NN_CASTED]], align 4
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP1]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP1]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR2]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -3835,11 +3787,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[NN_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[TMP0]], i32* [[NN_CASTED]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = load i32, i32* [[NN_CASTED]], align 4
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP1]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP1]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR2]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -3848,11 +3800,11 @@ int bar(int n){
 // CHECK3-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK3-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[NN]], i32* [[NN_ADDR]], align 4
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32* [[NN_ADDR]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i32* [[NN_ADDR]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..17
 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[NN:%.*]]) #[[ATTR2]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -3886,7 +3838,7 @@ int bar(int n){
 // CHECK3-NEXT:    store i8* null, i8** [[TMP5]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l182.region_id, i32 1, i8** [[TMP6]], i8** [[TMP7]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK3-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l182.region_id, i32 1, i8** [[TMP6]], i8** [[TMP7]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.21, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.22, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK3-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
 // CHECK3-NEXT:    br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK3:       omp_offload.failed:
@@ -3902,11 +3854,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP0]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..20 to void (i32*, i32*, ...)*), i32 [[TMP0]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..19
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..20
 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[VLA:%.*]]) #[[ATTR2]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -3988,14 +3940,14 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP7:%.*]] = mul nuw i32 2, [[TMP1]]
 // CHECK3-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 2
 // CHECK3-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-// CHECK3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK3-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
-// CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK3-NEXT:    store double* [[A]], double** [[TMP13]], align 4
-// CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    store i64 8, i64* [[TMP14]], align 4
+// CHECK3-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.24 to i8*), i32 40, i1 false)
+// CHECK3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK3-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 4
+// CHECK3-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK3-NEXT:    store double* [[A]], double** [[TMP14]], align 4
 // CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK3-NEXT:    store i8* null, i8** [[TMP15]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -4004,46 +3956,40 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32*
 // CHECK3-NEXT:    store i32 [[TMP5]], i32* [[TMP19]], align 4
-// CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK3-NEXT:    store i64 4, i64* [[TMP20]], align 4
-// CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK3-NEXT:    store i8* null, i8** [[TMP21]], align 4
-// CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK3-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
-// CHECK3-NEXT:    store i32 2, i32* [[TMP23]], align 4
-// CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK3-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
-// CHECK3-NEXT:    store i32 2, i32* [[TMP25]], align 4
-// CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK3-NEXT:    store i64 4, i64* [[TMP26]], align 4
-// CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK3-NEXT:    store i8* null, i8** [[TMP27]], align 4
-// CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK3-NEXT:    store i8* null, i8** [[TMP20]], align 4
+// CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
+// CHECK3-NEXT:    store i32 2, i32* [[TMP22]], align 4
+// CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
+// CHECK3-NEXT:    store i32 2, i32* [[TMP24]], align 4
+// CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK3-NEXT:    store i8* null, i8** [[TMP25]], align 4
+// CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP27]], align 4
+// CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK3-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
 // CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP29]], align 4
-// CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK3-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP31]], align 4
-// CHECK3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK3-NEXT:    store i64 4, i64* [[TMP32]], align 4
-// CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK3-NEXT:    store i8* null, i8** [[TMP33]], align 4
-// CHECK3-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK3-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 4
-// CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK3-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 4
-// CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK3-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 4
-// CHECK3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK3-NEXT:    store i8* null, i8** [[TMP39]], align 4
-// CHECK3-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.23, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK3-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK3-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK3-NEXT:    store i8* null, i8** [[TMP30]], align 4
+// CHECK3-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 4
+// CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 4
+// CHECK3-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK3-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 4
+// CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK3-NEXT:    store i8* null, i8** [[TMP36]], align 4
+// CHECK3-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK3-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK3-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK3:       omp_offload.failed:
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -4053,15 +3999,15 @@ int bar(int n){
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK3-NEXT:    br label [[OMP_IF_END]]
 // CHECK3:       omp_if.end:
-// CHECK3-NEXT:    [[TMP45:%.*]] = mul nsw i32 1, [[TMP1]]
-// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP45]]
+// CHECK3-NEXT:    [[TMP42:%.*]] = mul nsw i32 1, [[TMP1]]
+// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP42]]
 // CHECK3-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
-// CHECK3-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK3-NEXT:    [[CONV:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK3-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP47]]
-// CHECK3-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK3-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK3-NEXT:    [[CONV:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK3-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP44]]
+// CHECK3-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK3-NEXT:    ret i32 [[ADD3]]
 //
 //
@@ -4132,7 +4078,7 @@ int bar(int n){
 // CHECK3-NEXT:    store i8* null, i8** [[TMP26]], align 4
 // CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l209.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.25, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.26, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK3-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l209.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK3-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK3-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK3:       omp_offload.failed:
@@ -4200,7 +4146,7 @@ int bar(int n){
 // CHECK3-NEXT:    store i8* null, i8** [[TMP19]], align 4
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l192.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK3-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l192.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.30, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.31, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK3-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK3-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK3:       omp_offload.failed:
@@ -4237,11 +4183,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
 // CHECK3-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -4307,11 +4253,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
 // CHECK3-NEXT:    store i8 [[TMP5]], i8* [[CONV3]], align 1
 // CHECK3-NEXT:    [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..24 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..24
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -4369,11 +4315,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
 // CHECK3-NEXT:    store i16 [[TMP3]], i16* [[CONV1]], align 2
 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..27
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..29
 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -4619,14 +4565,14 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP92:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK4-NEXT:    [[TMP93:%.*]] = mul nuw i32 [[TMP92]], 8
 // CHECK4-NEXT:    [[TMP94:%.*]] = sext i32 [[TMP93]] to i64
-// CHECK4-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP88]], i32* [[TMP96]], align 4
-// CHECK4-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP88]], i32* [[TMP98]], align 4
-// CHECK4-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK4-NEXT:    store i64 4, i64* [[TMP99]], align 4
+// CHECK4-NEXT:    [[TMP95:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK4-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP95]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.10 to i8*), i32 72, i1 false)
+// CHECK4-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP88]], i32* [[TMP97]], align 4
+// CHECK4-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP88]], i32* [[TMP99]], align 4
 // CHECK4-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 0
 // CHECK4-NEXT:    store i8* null, i8** [[TMP100]], align 4
 // CHECK4-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 1
@@ -4635,86 +4581,74 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 1
 // CHECK4-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to [10 x float]**
 // CHECK4-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP104]], align 4
-// CHECK4-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK4-NEXT:    store i64 40, i64* [[TMP105]], align 4
-// CHECK4-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 1
-// CHECK4-NEXT:    store i8* null, i8** [[TMP106]], align 4
-// CHECK4-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
-// CHECK4-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP108]], align 4
-// CHECK4-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
-// CHECK4-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP110]], align 4
-// CHECK4-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK4-NEXT:    store i64 4, i64* [[TMP111]], align 4
-// CHECK4-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 2
-// CHECK4-NEXT:    store i8* null, i8** [[TMP112]], align 4
-// CHECK4-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
+// CHECK4-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 1
+// CHECK4-NEXT:    store i8* null, i8** [[TMP105]], align 4
+// CHECK4-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
+// CHECK4-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP107]], align 4
+// CHECK4-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
+// CHECK4-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP109]], align 4
+// CHECK4-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 2
+// CHECK4-NEXT:    store i8* null, i8** [[TMP110]], align 4
+// CHECK4-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
+// CHECK4-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to float**
+// CHECK4-NEXT:    store float* [[VLA]], float** [[TMP112]], align 4
+// CHECK4-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
 // CHECK4-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to float**
 // CHECK4-NEXT:    store float* [[VLA]], float** [[TMP114]], align 4
-// CHECK4-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
-// CHECK4-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to float**
-// CHECK4-NEXT:    store float* [[VLA]], float** [[TMP116]], align 4
-// CHECK4-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK4-NEXT:    store i64 [[TMP91]], i64* [[TMP117]], align 4
-// CHECK4-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 3
-// CHECK4-NEXT:    store i8* null, i8** [[TMP118]], align 4
-// CHECK4-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
+// CHECK4-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK4-NEXT:    store i64 [[TMP91]], i64* [[TMP115]], align 4
+// CHECK4-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 3
+// CHECK4-NEXT:    store i8* null, i8** [[TMP116]], align 4
+// CHECK4-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
+// CHECK4-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to [5 x [10 x double]]**
+// CHECK4-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP118]], align 4
+// CHECK4-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
 // CHECK4-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to [5 x [10 x double]]**
 // CHECK4-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP120]], align 4
-// CHECK4-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
-// CHECK4-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to [5 x [10 x double]]**
-// CHECK4-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP122]], align 4
-// CHECK4-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK4-NEXT:    store i64 400, i64* [[TMP123]], align 4
-// CHECK4-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 4
-// CHECK4-NEXT:    store i8* null, i8** [[TMP124]], align 4
-// CHECK4-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 5
-// CHECK4-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i32*
-// CHECK4-NEXT:    store i32 5, i32* [[TMP126]], align 4
-// CHECK4-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 5
+// CHECK4-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 4
+// CHECK4-NEXT:    store i8* null, i8** [[TMP121]], align 4
+// CHECK4-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 5
+// CHECK4-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i32*
+// CHECK4-NEXT:    store i32 5, i32* [[TMP123]], align 4
+// CHECK4-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 5
+// CHECK4-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to i32*
+// CHECK4-NEXT:    store i32 5, i32* [[TMP125]], align 4
+// CHECK4-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 5
+// CHECK4-NEXT:    store i8* null, i8** [[TMP126]], align 4
+// CHECK4-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 6
 // CHECK4-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i32*
-// CHECK4-NEXT:    store i32 5, i32* [[TMP128]], align 4
-// CHECK4-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK4-NEXT:    store i64 4, i64* [[TMP129]], align 4
-// CHECK4-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 5
-// CHECK4-NEXT:    store i8* null, i8** [[TMP130]], align 4
-// CHECK4-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 6
-// CHECK4-NEXT:    [[TMP132:%.*]] = bitcast i8** [[TMP131]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[TMP132]], align 4
-// CHECK4-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 6
-// CHECK4-NEXT:    [[TMP134:%.*]] = bitcast i8** [[TMP133]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[TMP134]], align 4
-// CHECK4-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK4-NEXT:    store i64 4, i64* [[TMP135]], align 4
-// CHECK4-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 6
-// CHECK4-NEXT:    store i8* null, i8** [[TMP136]], align 4
-// CHECK4-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 7
-// CHECK4-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to double**
-// CHECK4-NEXT:    store double* [[VLA1]], double** [[TMP138]], align 4
-// CHECK4-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 7
-// CHECK4-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to double**
-// CHECK4-NEXT:    store double* [[VLA1]], double** [[TMP140]], align 4
-// CHECK4-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK4-NEXT:    store i64 [[TMP94]], i64* [[TMP141]], align 4
-// CHECK4-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 7
+// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[TMP128]], align 4
+// CHECK4-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 6
+// CHECK4-NEXT:    [[TMP130:%.*]] = bitcast i8** [[TMP129]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[TMP130]], align 4
+// CHECK4-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 6
+// CHECK4-NEXT:    store i8* null, i8** [[TMP131]], align 4
+// CHECK4-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 7
+// CHECK4-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to double**
+// CHECK4-NEXT:    store double* [[VLA1]], double** [[TMP133]], align 4
+// CHECK4-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 7
+// CHECK4-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to double**
+// CHECK4-NEXT:    store double* [[VLA1]], double** [[TMP135]], align 4
+// CHECK4-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK4-NEXT:    store i64 [[TMP94]], i64* [[TMP136]], align 4
+// CHECK4-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 7
+// CHECK4-NEXT:    store i8* null, i8** [[TMP137]], align 4
+// CHECK4-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 8
+// CHECK4-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to %struct.TT**
+// CHECK4-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP139]], align 4
+// CHECK4-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 8
+// CHECK4-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to %struct.TT**
+// CHECK4-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP141]], align 4
+// CHECK4-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 8
 // CHECK4-NEXT:    store i8* null, i8** [[TMP142]], align 4
-// CHECK4-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 8
-// CHECK4-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to %struct.TT**
-// CHECK4-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP144]], align 4
-// CHECK4-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 8
-// CHECK4-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to %struct.TT**
-// CHECK4-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP146]], align 4
-// CHECK4-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK4-NEXT:    store i64 12, i64* [[TMP147]], align 4
-// CHECK4-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 8
-// CHECK4-NEXT:    store i8* null, i8** [[TMP148]], align 4
-// CHECK4-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP152:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP149]], i8** [[TMP150]], i64* [[TMP151]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK4-NEXT:    [[TMP153:%.*]] = icmp ne i32 [[TMP152]], 0
-// CHECK4-NEXT:    br i1 [[TMP153]], label [[OMP_OFFLOAD_FAILED23:%.*]], label [[OMP_OFFLOAD_CONT24:%.*]]
+// CHECK4-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP146:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP143]], i8** [[TMP144]], i64* [[TMP145]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.11, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK4-NEXT:    [[TMP147:%.*]] = icmp ne i32 [[TMP146]], 0
+// CHECK4-NEXT:    br i1 [[TMP147]], label [[OMP_OFFLOAD_FAILED23:%.*]], label [[OMP_OFFLOAD_CONT24:%.*]]
 // CHECK4:       omp_offload.failed23:
 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142(i32 [[TMP88]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
 // CHECK4-NEXT:    br label [[OMP_OFFLOAD_CONT24]]
@@ -4725,50 +4659,50 @@ int bar(int n){
 // CHECK4-NEXT:    br label [[OMP_IF_END26]]
 // CHECK4:       omp_if.end26:
 // CHECK4-NEXT:    store i32 0, i32* [[NN]], align 4
-// CHECK4-NEXT:    [[TMP154:%.*]] = load i32, i32* [[NN]], align 4
-// CHECK4-NEXT:    store i32 [[TMP154]], i32* [[NN_CASTED]], align 4
-// CHECK4-NEXT:    [[TMP155:%.*]] = load i32, i32* [[NN_CASTED]], align 4
-// CHECK4-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP155]], i32* [[TMP157]], align 4
-// CHECK4-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP155]], i32* [[TMP159]], align 4
-// CHECK4-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS29]], i32 0, i32 0
-// CHECK4-NEXT:    store i8* null, i8** [[TMP160]], align 4
-// CHECK4-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP163:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154.region_id, i32 1, i8** [[TMP161]], i8** [[TMP162]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.13, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK4-NEXT:    [[TMP164:%.*]] = icmp ne i32 [[TMP163]], 0
-// CHECK4-NEXT:    br i1 [[TMP164]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK4-NEXT:    [[TMP148:%.*]] = load i32, i32* [[NN]], align 4
+// CHECK4-NEXT:    store i32 [[TMP148]], i32* [[NN_CASTED]], align 4
+// CHECK4-NEXT:    [[TMP149:%.*]] = load i32, i32* [[NN_CASTED]], align 4
+// CHECK4-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP149]], i32* [[TMP151]], align 4
+// CHECK4-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP149]], i32* [[TMP153]], align 4
+// CHECK4-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS29]], i32 0, i32 0
+// CHECK4-NEXT:    store i8* null, i8** [[TMP154]], align 4
+// CHECK4-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP157:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154.region_id, i32 1, i8** [[TMP155]], i8** [[TMP156]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK4-NEXT:    [[TMP158:%.*]] = icmp ne i32 [[TMP157]], 0
+// CHECK4-NEXT:    br i1 [[TMP158]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
 // CHECK4:       omp_offload.failed30:
-// CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154(i32 [[TMP155]]) #[[ATTR3]]
+// CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154(i32 [[TMP149]]) #[[ATTR3]]
 // CHECK4-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
 // CHECK4:       omp_offload.cont31:
-// CHECK4-NEXT:    [[TMP165:%.*]] = load i32, i32* [[NN]], align 4
-// CHECK4-NEXT:    store i32 [[TMP165]], i32* [[NN_CASTED32]], align 4
-// CHECK4-NEXT:    [[TMP166:%.*]] = load i32, i32* [[NN_CASTED32]], align 4
-// CHECK4-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP168:%.*]] = bitcast i8** [[TMP167]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP166]], i32* [[TMP168]], align 4
-// CHECK4-NEXT:    [[TMP169:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP170:%.*]] = bitcast i8** [[TMP169]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP166]], i32* [[TMP170]], align 4
-// CHECK4-NEXT:    [[TMP171:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 0
-// CHECK4-NEXT:    store i8* null, i8** [[TMP171]], align 4
-// CHECK4-NEXT:    [[TMP172:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP173:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP174:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157.region_id, i32 1, i8** [[TMP172]], i8** [[TMP173]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK4-NEXT:    [[TMP175:%.*]] = icmp ne i32 [[TMP174]], 0
-// CHECK4-NEXT:    br i1 [[TMP175]], label [[OMP_OFFLOAD_FAILED36:%.*]], label [[OMP_OFFLOAD_CONT37:%.*]]
+// CHECK4-NEXT:    [[TMP159:%.*]] = load i32, i32* [[NN]], align 4
+// CHECK4-NEXT:    store i32 [[TMP159]], i32* [[NN_CASTED32]], align 4
+// CHECK4-NEXT:    [[TMP160:%.*]] = load i32, i32* [[NN_CASTED32]], align 4
+// CHECK4-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP162:%.*]] = bitcast i8** [[TMP161]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP160]], i32* [[TMP162]], align 4
+// CHECK4-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP164:%.*]] = bitcast i8** [[TMP163]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP160]], i32* [[TMP164]], align 4
+// CHECK4-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 0
+// CHECK4-NEXT:    store i8* null, i8** [[TMP165]], align 4
+// CHECK4-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP168:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157.region_id, i32 1, i8** [[TMP166]], i8** [[TMP167]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.18, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.19, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK4-NEXT:    [[TMP169:%.*]] = icmp ne i32 [[TMP168]], 0
+// CHECK4-NEXT:    br i1 [[TMP169]], label [[OMP_OFFLOAD_FAILED36:%.*]], label [[OMP_OFFLOAD_CONT37:%.*]]
 // CHECK4:       omp_offload.failed36:
-// CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157(i32 [[TMP166]]) #[[ATTR3]]
+// CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157(i32 [[TMP160]]) #[[ATTR3]]
 // CHECK4-NEXT:    br label [[OMP_OFFLOAD_CONT37]]
 // CHECK4:       omp_offload.cont37:
-// CHECK4-NEXT:    [[TMP176:%.*]] = load i32, i32* [[A]], align 4
-// CHECK4-NEXT:    [[TMP177:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP177]])
-// CHECK4-NEXT:    ret i32 [[TMP176]]
+// CHECK4-NEXT:    [[TMP170:%.*]] = load i32, i32* [[A]], align 4
+// CHECK4-NEXT:    [[TMP171:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP171]])
+// CHECK4-NEXT:    ret i32 [[TMP170]]
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101
@@ -5135,11 +5069,11 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[NN_ADDR]], align 4
 // CHECK4-NEXT:    store i32 [[TMP0]], i32* [[NN_CASTED]], align 4
 // CHECK4-NEXT:    [[TMP1:%.*]] = load i32, i32* [[NN_CASTED]], align 4
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP1]])
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP1]])
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..12
 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR2]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5152,11 +5086,11 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[NN_ADDR]], align 4
 // CHECK4-NEXT:    store i32 [[TMP0]], i32* [[NN_CASTED]], align 4
 // CHECK4-NEXT:    [[TMP1:%.*]] = load i32, i32* [[NN_CASTED]], align 4
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP1]])
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP1]])
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR2]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5177,11 +5111,11 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[NN_ADDR]], align 4
 // CHECK4-NEXT:    store i32 [[TMP0]], i32* [[NN_CASTED]], align 4
 // CHECK4-NEXT:    [[TMP1:%.*]] = load i32, i32* [[NN_CASTED]], align 4
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP1]])
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP1]])
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR2]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5190,11 +5124,11 @@ int bar(int n){
 // CHECK4-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK4-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
 // CHECK4-NEXT:    store i32 [[NN]], i32* [[NN_ADDR]], align 4
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32* [[NN_ADDR]])
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i32* [[NN_ADDR]])
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..17
 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[NN:%.*]]) #[[ATTR2]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5228,7 +5162,7 @@ int bar(int n){
 // CHECK4-NEXT:    store i8* null, i8** [[TMP5]], align 4
 // CHECK4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l182.region_id, i32 1, i8** [[TMP6]], i8** [[TMP7]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK4-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l182.region_id, i32 1, i8** [[TMP6]], i8** [[TMP7]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.21, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.22, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK4-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
 // CHECK4-NEXT:    br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK4:       omp_offload.failed:
@@ -5244,11 +5178,11 @@ int bar(int n){
 // CHECK4-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
 // CHECK4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP0]])
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..20 to void (i32*, i32*, ...)*), i32 [[TMP0]])
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..19
+// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..20
 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[VLA:%.*]]) #[[ATTR2]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5330,14 +5264,14 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP7:%.*]] = mul nuw i32 2, [[TMP1]]
 // CHECK4-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 2
 // CHECK4-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-// CHECK4-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK4-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
-// CHECK4-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK4-NEXT:    store double* [[A]], double** [[TMP13]], align 4
-// CHECK4-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK4-NEXT:    store i64 8, i64* [[TMP14]], align 4
+// CHECK4-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK4-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.24 to i8*), i32 40, i1 false)
+// CHECK4-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK4-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 4
+// CHECK4-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK4-NEXT:    store double* [[A]], double** [[TMP14]], align 4
 // CHECK4-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK4-NEXT:    store i8* null, i8** [[TMP15]], align 4
 // CHECK4-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -5346,46 +5280,40 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK4-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32*
 // CHECK4-NEXT:    store i32 [[TMP5]], i32* [[TMP19]], align 4
-// CHECK4-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK4-NEXT:    store i64 4, i64* [[TMP20]], align 4
-// CHECK4-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK4-NEXT:    store i8* null, i8** [[TMP21]], align 4
-// CHECK4-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK4-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
-// CHECK4-NEXT:    store i32 2, i32* [[TMP23]], align 4
-// CHECK4-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK4-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
-// CHECK4-NEXT:    store i32 2, i32* [[TMP25]], align 4
-// CHECK4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK4-NEXT:    store i64 4, i64* [[TMP26]], align 4
-// CHECK4-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK4-NEXT:    store i8* null, i8** [[TMP27]], align 4
-// CHECK4-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK4-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK4-NEXT:    store i8* null, i8** [[TMP20]], align 4
+// CHECK4-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK4-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
+// CHECK4-NEXT:    store i32 2, i32* [[TMP22]], align 4
+// CHECK4-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK4-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
+// CHECK4-NEXT:    store i32 2, i32* [[TMP24]], align 4
+// CHECK4-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK4-NEXT:    store i8* null, i8** [[TMP25]], align 4
+// CHECK4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK4-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP27]], align 4
+// CHECK4-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK4-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
 // CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP29]], align 4
-// CHECK4-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK4-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP31]], align 4
-// CHECK4-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK4-NEXT:    store i64 4, i64* [[TMP32]], align 4
-// CHECK4-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK4-NEXT:    store i8* null, i8** [[TMP33]], align 4
-// CHECK4-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK4-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK4-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 4
-// CHECK4-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK4-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK4-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 4
-// CHECK4-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK4-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 4
-// CHECK4-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK4-NEXT:    store i8* null, i8** [[TMP39]], align 4
-// CHECK4-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.23, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK4-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK4-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK4-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK4-NEXT:    store i8* null, i8** [[TMP30]], align 4
+// CHECK4-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK4-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK4-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 4
+// CHECK4-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK4-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK4-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 4
+// CHECK4-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK4-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 4
+// CHECK4-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK4-NEXT:    store i8* null, i8** [[TMP36]], align 4
+// CHECK4-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK4-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK4-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK4:       omp_offload.failed:
 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK4-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -5395,15 +5323,15 @@ int bar(int n){
 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK4-NEXT:    br label [[OMP_IF_END]]
 // CHECK4:       omp_if.end:
-// CHECK4-NEXT:    [[TMP45:%.*]] = mul nsw i32 1, [[TMP1]]
-// CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP45]]
+// CHECK4-NEXT:    [[TMP42:%.*]] = mul nsw i32 1, [[TMP1]]
+// CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP42]]
 // CHECK4-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
-// CHECK4-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK4-NEXT:    [[CONV:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK4-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK4-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP47]]
-// CHECK4-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK4-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK4-NEXT:    [[CONV:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK4-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK4-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP44]]
+// CHECK4-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK4-NEXT:    ret i32 [[ADD3]]
 //
 //
@@ -5474,7 +5402,7 @@ int bar(int n){
 // CHECK4-NEXT:    store i8* null, i8** [[TMP26]], align 4
 // CHECK4-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK4-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l209.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.25, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.26, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK4-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l209.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK4-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK4-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK4:       omp_offload.failed:
@@ -5542,7 +5470,7 @@ int bar(int n){
 // CHECK4-NEXT:    store i8* null, i8** [[TMP19]], align 4
 // CHECK4-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK4-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l192.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK4-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l192.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.30, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.31, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK4-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK4-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK4:       omp_offload.failed:
@@ -5579,11 +5507,11 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4
 // CHECK4-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
 // CHECK4-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5649,11 +5577,11 @@ int bar(int n){
 // CHECK4-NEXT:    [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
 // CHECK4-NEXT:    store i8 [[TMP5]], i8* [[CONV3]], align 1
 // CHECK4-NEXT:    [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..24 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..24
+// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5711,11 +5639,11 @@ int bar(int n){
 // CHECK4-NEXT:    [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
 // CHECK4-NEXT:    store i16 [[TMP3]], i16* [[CONV1]], align 2
 // CHECK4-NEXT:    [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..27
+// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..29
 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -8072,14 +8000,14 @@ int bar(int n){
 // CHECK17-NEXT:    [[TMP92:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK17-NEXT:    [[TMP93:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK17-NEXT:    [[TMP94:%.*]] = mul nuw i64 [[TMP93]], 8
-// CHECK17-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP90]], i64* [[TMP96]], align 8
-// CHECK17-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP90]], i64* [[TMP98]], align 8
-// CHECK17-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 4, i64* [[TMP99]], align 8
+// CHECK17-NEXT:    [[TMP95:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP95]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.10 to i8*), i64 72, i1 false)
+// CHECK17-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP90]], i64* [[TMP97]], align 8
+// CHECK17-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP90]], i64* [[TMP99]], align 8
 // CHECK17-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 0
 // CHECK17-NEXT:    store i8* null, i8** [[TMP100]], align 8
 // CHECK17-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 1
@@ -8088,86 +8016,74 @@ int bar(int n){
 // CHECK17-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 1
 // CHECK17-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to [10 x float]**
 // CHECK17-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP104]], align 8
-// CHECK17-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 40, i64* [[TMP105]], align 8
-// CHECK17-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP106]], align 8
-// CHECK17-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP108]], align 8
-// CHECK17-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP110]], align 8
-// CHECK17-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 8, i64* [[TMP111]], align 8
-// CHECK17-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP112]], align 8
-// CHECK17-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP105]], align 8
+// CHECK17-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP107]], align 8
+// CHECK17-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP109]], align 8
+// CHECK17-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP110]], align 8
+// CHECK17-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to float**
+// CHECK17-NEXT:    store float* [[VLA]], float** [[TMP112]], align 8
+// CHECK17-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 3
 // CHECK17-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to float**
 // CHECK17-NEXT:    store float* [[VLA]], float** [[TMP114]], align 8
-// CHECK17-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to float**
-// CHECK17-NEXT:    store float* [[VLA]], float** [[TMP116]], align 8
-// CHECK17-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK17-NEXT:    store i64 [[TMP92]], i64* [[TMP117]], align 8
-// CHECK17-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 3
-// CHECK17-NEXT:    store i8* null, i8** [[TMP118]], align 8
-// CHECK17-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 4
+// CHECK17-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK17-NEXT:    store i64 [[TMP92]], i64* [[TMP115]], align 8
+// CHECK17-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 3
+// CHECK17-NEXT:    store i8* null, i8** [[TMP116]], align 8
+// CHECK17-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 4
+// CHECK17-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to [5 x [10 x double]]**
+// CHECK17-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP118]], align 8
+// CHECK17-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 4
 // CHECK17-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to [5 x [10 x double]]**
 // CHECK17-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP120]], align 8
-// CHECK17-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 4
-// CHECK17-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to [5 x [10 x double]]**
-// CHECK17-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP122]], align 8
-// CHECK17-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK17-NEXT:    store i64 400, i64* [[TMP123]], align 8
-// CHECK17-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 4
-// CHECK17-NEXT:    store i8* null, i8** [[TMP124]], align 8
-// CHECK17-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 5
-// CHECK17-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i64*
-// CHECK17-NEXT:    store i64 5, i64* [[TMP126]], align 8
-// CHECK17-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 5
+// CHECK17-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 4
+// CHECK17-NEXT:    store i8* null, i8** [[TMP121]], align 8
+// CHECK17-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 5
+// CHECK17-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i64*
+// CHECK17-NEXT:    store i64 5, i64* [[TMP123]], align 8
+// CHECK17-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 5
+// CHECK17-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to i64*
+// CHECK17-NEXT:    store i64 5, i64* [[TMP125]], align 8
+// CHECK17-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 5
+// CHECK17-NEXT:    store i8* null, i8** [[TMP126]], align 8
+// CHECK17-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 6
 // CHECK17-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i64*
-// CHECK17-NEXT:    store i64 5, i64* [[TMP128]], align 8
-// CHECK17-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK17-NEXT:    store i64 8, i64* [[TMP129]], align 8
-// CHECK17-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 5
-// CHECK17-NEXT:    store i8* null, i8** [[TMP130]], align 8
-// CHECK17-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 6
-// CHECK17-NEXT:    [[TMP132:%.*]] = bitcast i8** [[TMP131]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP132]], align 8
-// CHECK17-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 6
-// CHECK17-NEXT:    [[TMP134:%.*]] = bitcast i8** [[TMP133]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP134]], align 8
-// CHECK17-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK17-NEXT:    store i64 8, i64* [[TMP135]], align 8
-// CHECK17-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 6
-// CHECK17-NEXT:    store i8* null, i8** [[TMP136]], align 8
-// CHECK17-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 7
-// CHECK17-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to double**
-// CHECK17-NEXT:    store double* [[VLA1]], double** [[TMP138]], align 8
-// CHECK17-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 7
-// CHECK17-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to double**
-// CHECK17-NEXT:    store double* [[VLA1]], double** [[TMP140]], align 8
-// CHECK17-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK17-NEXT:    store i64 [[TMP94]], i64* [[TMP141]], align 8
-// CHECK17-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 7
+// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP128]], align 8
+// CHECK17-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 6
+// CHECK17-NEXT:    [[TMP130:%.*]] = bitcast i8** [[TMP129]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP130]], align 8
+// CHECK17-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 6
+// CHECK17-NEXT:    store i8* null, i8** [[TMP131]], align 8
+// CHECK17-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 7
+// CHECK17-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to double**
+// CHECK17-NEXT:    store double* [[VLA1]], double** [[TMP133]], align 8
+// CHECK17-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 7
+// CHECK17-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to double**
+// CHECK17-NEXT:    store double* [[VLA1]], double** [[TMP135]], align 8
+// CHECK17-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK17-NEXT:    store i64 [[TMP94]], i64* [[TMP136]], align 8
+// CHECK17-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 7
+// CHECK17-NEXT:    store i8* null, i8** [[TMP137]], align 8
+// CHECK17-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 8
+// CHECK17-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to %struct.TT**
+// CHECK17-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP139]], align 8
+// CHECK17-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 8
+// CHECK17-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to %struct.TT**
+// CHECK17-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP141]], align 8
+// CHECK17-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 8
 // CHECK17-NEXT:    store i8* null, i8** [[TMP142]], align 8
-// CHECK17-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 8
-// CHECK17-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to %struct.TT**
-// CHECK17-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP144]], align 8
-// CHECK17-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 8
-// CHECK17-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to %struct.TT**
-// CHECK17-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP146]], align 8
-// CHECK17-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK17-NEXT:    store i64 16, i64* [[TMP147]], align 8
-// CHECK17-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 8
-// CHECK17-NEXT:    store i8* null, i8** [[TMP148]], align 8
-// CHECK17-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP152:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP149]], i8** [[TMP150]], i64* [[TMP151]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP153:%.*]] = icmp ne i32 [[TMP152]], 0
-// CHECK17-NEXT:    br i1 [[TMP153]], label [[OMP_OFFLOAD_FAILED28:%.*]], label [[OMP_OFFLOAD_CONT29:%.*]]
+// CHECK17-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP146:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP143]], i8** [[TMP144]], i64* [[TMP145]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.11, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP147:%.*]] = icmp ne i32 [[TMP146]], 0
+// CHECK17-NEXT:    br i1 [[TMP147]], label [[OMP_OFFLOAD_FAILED28:%.*]], label [[OMP_OFFLOAD_CONT29:%.*]]
 // CHECK17:       omp_offload.failed28:
 // CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142(i64 [[TMP90]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT29]]
@@ -8178,52 +8094,52 @@ int bar(int n){
 // CHECK17-NEXT:    br label [[OMP_IF_END31]]
 // CHECK17:       omp_if.end31:
 // CHECK17-NEXT:    store i32 0, i32* [[NN]], align 4
-// CHECK17-NEXT:    [[TMP154:%.*]] = load i32, i32* [[NN]], align 4
+// CHECK17-NEXT:    [[TMP148:%.*]] = load i32, i32* [[NN]], align 4
 // CHECK17-NEXT:    [[CONV32:%.*]] = bitcast i64* [[NN_CASTED]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP154]], i32* [[CONV32]], align 4
-// CHECK17-NEXT:    [[TMP155:%.*]] = load i64, i64* [[NN_CASTED]], align 8
-// CHECK17-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP155]], i64* [[TMP157]], align 8
-// CHECK17-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP155]], i64* [[TMP159]], align 8
-// CHECK17-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i64 0, i64 0
-// CHECK17-NEXT:    store i8* null, i8** [[TMP160]], align 8
-// CHECK17-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP163:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154.region_id, i32 1, i8** [[TMP161]], i8** [[TMP162]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.13, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP164:%.*]] = icmp ne i32 [[TMP163]], 0
-// CHECK17-NEXT:    br i1 [[TMP164]], label [[OMP_OFFLOAD_FAILED36:%.*]], label [[OMP_OFFLOAD_CONT37:%.*]]
+// CHECK17-NEXT:    store i32 [[TMP148]], i32* [[CONV32]], align 4
+// CHECK17-NEXT:    [[TMP149:%.*]] = load i64, i64* [[NN_CASTED]], align 8
+// CHECK17-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP149]], i64* [[TMP151]], align 8
+// CHECK17-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP149]], i64* [[TMP153]], align 8
+// CHECK17-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i64 0, i64 0
+// CHECK17-NEXT:    store i8* null, i8** [[TMP154]], align 8
+// CHECK17-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP157:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154.region_id, i32 1, i8** [[TMP155]], i8** [[TMP156]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP158:%.*]] = icmp ne i32 [[TMP157]], 0
+// CHECK17-NEXT:    br i1 [[TMP158]], label [[OMP_OFFLOAD_FAILED36:%.*]], label [[OMP_OFFLOAD_CONT37:%.*]]
 // CHECK17:       omp_offload.failed36:
-// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154(i64 [[TMP155]]) #[[ATTR3]]
+// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154(i64 [[TMP149]]) #[[ATTR3]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT37]]
 // CHECK17:       omp_offload.cont37:
-// CHECK17-NEXT:    [[TMP165:%.*]] = load i32, i32* [[NN]], align 4
+// CHECK17-NEXT:    [[TMP159:%.*]] = load i32, i32* [[NN]], align 4
 // CHECK17-NEXT:    [[CONV39:%.*]] = bitcast i64* [[NN_CASTED38]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP165]], i32* [[CONV39]], align 4
-// CHECK17-NEXT:    [[TMP166:%.*]] = load i64, i64* [[NN_CASTED38]], align 8
-// CHECK17-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS40]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP168:%.*]] = bitcast i8** [[TMP167]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP166]], i64* [[TMP168]], align 8
-// CHECK17-NEXT:    [[TMP169:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS41]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP170:%.*]] = bitcast i8** [[TMP169]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP166]], i64* [[TMP170]], align 8
-// CHECK17-NEXT:    [[TMP171:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS42]], i64 0, i64 0
-// CHECK17-NEXT:    store i8* null, i8** [[TMP171]], align 8
-// CHECK17-NEXT:    [[TMP172:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS40]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP173:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS41]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP174:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157.region_id, i32 1, i8** [[TMP172]], i8** [[TMP173]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP175:%.*]] = icmp ne i32 [[TMP174]], 0
-// CHECK17-NEXT:    br i1 [[TMP175]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
+// CHECK17-NEXT:    store i32 [[TMP159]], i32* [[CONV39]], align 4
+// CHECK17-NEXT:    [[TMP160:%.*]] = load i64, i64* [[NN_CASTED38]], align 8
+// CHECK17-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS40]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP162:%.*]] = bitcast i8** [[TMP161]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP160]], i64* [[TMP162]], align 8
+// CHECK17-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS41]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP164:%.*]] = bitcast i8** [[TMP163]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP160]], i64* [[TMP164]], align 8
+// CHECK17-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS42]], i64 0, i64 0
+// CHECK17-NEXT:    store i8* null, i8** [[TMP165]], align 8
+// CHECK17-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS40]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS41]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP168:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157.region_id, i32 1, i8** [[TMP166]], i8** [[TMP167]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.18, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.19, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP169:%.*]] = icmp ne i32 [[TMP168]], 0
+// CHECK17-NEXT:    br i1 [[TMP169]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
 // CHECK17:       omp_offload.failed43:
-// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157(i64 [[TMP166]]) #[[ATTR3]]
+// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157(i64 [[TMP160]]) #[[ATTR3]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT44]]
 // CHECK17:       omp_offload.cont44:
-// CHECK17-NEXT:    [[TMP176:%.*]] = load i32, i32* [[A]], align 4
-// CHECK17-NEXT:    [[TMP177:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP177]])
-// CHECK17-NEXT:    ret i32 [[TMP176]]
+// CHECK17-NEXT:    [[TMP170:%.*]] = load i32, i32* [[A]], align 4
+// CHECK17-NEXT:    [[TMP171:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP171]])
+// CHECK17-NEXT:    ret i32 [[TMP170]]
 //
 //
 // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101
@@ -8605,11 +8521,11 @@ int bar(int n){
 // CHECK17-NEXT:    [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP0]], i32* [[CONV1]], align 4
 // CHECK17-NEXT:    [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP1]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP1]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..12
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -8624,11 +8540,11 @@ int bar(int n){
 // CHECK17-NEXT:    [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP0]], i32* [[CONV1]], align 4
 // CHECK17-NEXT:    [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP1]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP1]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -8652,11 +8568,11 @@ int bar(int n){
 // CHECK17-NEXT:    [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP0]], i32* [[CONV1]], align 4
 // CHECK17-NEXT:    [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP1]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP1]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -8666,11 +8582,11 @@ int bar(int n){
 // CHECK17-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
 // CHECK17-NEXT:    store i64 [[NN]], i64* [[NN_ADDR]], align 8
 // CHECK17-NEXT:    [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32*
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32* [[CONV]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i32* [[CONV]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..17
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[NN:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -8705,7 +8621,7 @@ int bar(int n){
 // CHECK17-NEXT:    store i8* null, i8** [[TMP6]], align 8
 // CHECK17-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP9:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l182.region_id, i32 1, i8** [[TMP7]], i8** [[TMP8]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP9:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l182.region_id, i32 1, i8** [[TMP7]], i8** [[TMP8]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.21, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.22, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK17-NEXT:    [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
 // CHECK17-NEXT:    br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK17:       omp_offload.failed:
@@ -8721,11 +8637,11 @@ int bar(int n){
 // CHECK17-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
 // CHECK17-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
 // CHECK17-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..20 to void (i32*, i32*, ...)*), i64 [[TMP0]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..19
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..20
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -8808,14 +8724,14 @@ int bar(int n){
 // CHECK17-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP8:%.*]] = mul nuw i64 2, [[TMP2]]
 // CHECK17-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2
-// CHECK17-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK17-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
-// CHECK17-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK17-NEXT:    store double* [[A]], double** [[TMP13]], align 8
-// CHECK17-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 8, i64* [[TMP14]], align 8
+// CHECK17-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.24 to i8*), i64 40, i1 false)
+// CHECK17-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK17-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 8
+// CHECK17-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK17-NEXT:    store double* [[A]], double** [[TMP14]], align 8
 // CHECK17-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK17-NEXT:    store i8* null, i8** [[TMP15]], align 8
 // CHECK17-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -8824,46 +8740,40 @@ int bar(int n){
 // CHECK17-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK17-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i64*
 // CHECK17-NEXT:    store i64 [[TMP6]], i64* [[TMP19]], align 8
-// CHECK17-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 4, i64* [[TMP20]], align 8
-// CHECK17-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP21]], align 8
-// CHECK17-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
-// CHECK17-NEXT:    store i64 2, i64* [[TMP23]], align 8
-// CHECK17-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK17-NEXT:    store i64 2, i64* [[TMP25]], align 8
-// CHECK17-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 8, i64* [[TMP26]], align 8
-// CHECK17-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP27]], align 8
-// CHECK17-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP20]], align 8
+// CHECK17-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
+// CHECK17-NEXT:    store i64 2, i64* [[TMP22]], align 8
+// CHECK17-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK17-NEXT:    store i64 2, i64* [[TMP24]], align 8
+// CHECK17-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP25]], align 8
+// CHECK17-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP27]], align 8
+// CHECK17-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK17-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
 // CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP29]], align 8
-// CHECK17-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP31]], align 8
-// CHECK17-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK17-NEXT:    store i64 8, i64* [[TMP32]], align 8
-// CHECK17-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK17-NEXT:    store i8* null, i8** [[TMP33]], align 8
-// CHECK17-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK17-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK17-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 8
-// CHECK17-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK17-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK17-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 8
-// CHECK17-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK17-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 8
-// CHECK17-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK17-NEXT:    store i8* null, i8** [[TMP39]], align 8
-// CHECK17-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.23, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK17-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK17-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK17-NEXT:    store i8* null, i8** [[TMP30]], align 8
+// CHECK17-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK17-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK17-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 8
+// CHECK17-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK17-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK17-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 8
+// CHECK17-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK17-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 8
+// CHECK17-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK17-NEXT:    store i8* null, i8** [[TMP36]], align 8
+// CHECK17-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK17-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK17:       omp_offload.failed:
 // CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -8873,15 +8783,15 @@ int bar(int n){
 // CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK17-NEXT:    br label [[OMP_IF_END]]
 // CHECK17:       omp_if.end:
-// CHECK17-NEXT:    [[TMP45:%.*]] = mul nsw i64 1, [[TMP2]]
-// CHECK17-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP45]]
+// CHECK17-NEXT:    [[TMP42:%.*]] = mul nsw i64 1, [[TMP2]]
+// CHECK17-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP42]]
 // CHECK17-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
-// CHECK17-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK17-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK17-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK17-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP47]]
-// CHECK17-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK17-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK17-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK17-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK17-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP44]]
+// CHECK17-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK17-NEXT:    ret i32 [[ADD4]]
 //
 //
@@ -8953,7 +8863,7 @@ int bar(int n){
 // CHECK17-NEXT:    store i8* null, i8** [[TMP26]], align 8
 // CHECK17-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l209.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.25, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.26, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l209.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK17-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK17-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK17:       omp_offload.failed:
@@ -9022,7 +8932,7 @@ int bar(int n){
 // CHECK17-NEXT:    store i8* null, i8** [[TMP19]], align 8
 // CHECK17-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l192.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l192.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.30, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.31, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK17-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK17-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK17:       omp_offload.failed:
@@ -9061,11 +8971,11 @@ int bar(int n){
 // CHECK17-NEXT:    [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK17-NEXT:    [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9134,11 +9044,11 @@ int bar(int n){
 // CHECK17-NEXT:    [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
 // CHECK17-NEXT:    store i8 [[TMP5]], i8* [[CONV5]], align 1
 // CHECK17-NEXT:    [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..24 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..24
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9199,11 +9109,11 @@ int bar(int n){
 // CHECK17-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
 // CHECK17-NEXT:    store i16 [[TMP3]], i16* [[CONV3]], align 2
 // CHECK17-NEXT:    [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..27
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..29
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9455,14 +9365,14 @@ int bar(int n){
 // CHECK18-NEXT:    [[TMP92:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK18-NEXT:    [[TMP93:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK18-NEXT:    [[TMP94:%.*]] = mul nuw i64 [[TMP93]], 8
-// CHECK18-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP90]], i64* [[TMP96]], align 8
-// CHECK18-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP90]], i64* [[TMP98]], align 8
-// CHECK18-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 4, i64* [[TMP99]], align 8
+// CHECK18-NEXT:    [[TMP95:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP95]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.10 to i8*), i64 72, i1 false)
+// CHECK18-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP90]], i64* [[TMP97]], align 8
+// CHECK18-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP90]], i64* [[TMP99]], align 8
 // CHECK18-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 0
 // CHECK18-NEXT:    store i8* null, i8** [[TMP100]], align 8
 // CHECK18-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 1
@@ -9471,86 +9381,74 @@ int bar(int n){
 // CHECK18-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 1
 // CHECK18-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to [10 x float]**
 // CHECK18-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP104]], align 8
-// CHECK18-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 40, i64* [[TMP105]], align 8
-// CHECK18-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP106]], align 8
-// CHECK18-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP108]], align 8
-// CHECK18-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP110]], align 8
-// CHECK18-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 8, i64* [[TMP111]], align 8
-// CHECK18-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP112]], align 8
-// CHECK18-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP105]], align 8
+// CHECK18-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP107]], align 8
+// CHECK18-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP109]], align 8
+// CHECK18-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP110]], align 8
+// CHECK18-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to float**
+// CHECK18-NEXT:    store float* [[VLA]], float** [[TMP112]], align 8
+// CHECK18-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 3
 // CHECK18-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to float**
 // CHECK18-NEXT:    store float* [[VLA]], float** [[TMP114]], align 8
-// CHECK18-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to float**
-// CHECK18-NEXT:    store float* [[VLA]], float** [[TMP116]], align 8
-// CHECK18-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK18-NEXT:    store i64 [[TMP92]], i64* [[TMP117]], align 8
-// CHECK18-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 3
-// CHECK18-NEXT:    store i8* null, i8** [[TMP118]], align 8
-// CHECK18-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 4
+// CHECK18-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK18-NEXT:    store i64 [[TMP92]], i64* [[TMP115]], align 8
+// CHECK18-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 3
+// CHECK18-NEXT:    store i8* null, i8** [[TMP116]], align 8
+// CHECK18-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 4
+// CHECK18-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to [5 x [10 x double]]**
+// CHECK18-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP118]], align 8
+// CHECK18-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 4
 // CHECK18-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to [5 x [10 x double]]**
 // CHECK18-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP120]], align 8
-// CHECK18-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 4
-// CHECK18-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to [5 x [10 x double]]**
-// CHECK18-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP122]], align 8
-// CHECK18-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK18-NEXT:    store i64 400, i64* [[TMP123]], align 8
-// CHECK18-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 4
-// CHECK18-NEXT:    store i8* null, i8** [[TMP124]], align 8
-// CHECK18-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 5
-// CHECK18-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i64*
-// CHECK18-NEXT:    store i64 5, i64* [[TMP126]], align 8
-// CHECK18-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 5
+// CHECK18-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 4
+// CHECK18-NEXT:    store i8* null, i8** [[TMP121]], align 8
+// CHECK18-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 5
+// CHECK18-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i64*
+// CHECK18-NEXT:    store i64 5, i64* [[TMP123]], align 8
+// CHECK18-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 5
+// CHECK18-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to i64*
+// CHECK18-NEXT:    store i64 5, i64* [[TMP125]], align 8
+// CHECK18-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 5
+// CHECK18-NEXT:    store i8* null, i8** [[TMP126]], align 8
+// CHECK18-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 6
 // CHECK18-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i64*
-// CHECK18-NEXT:    store i64 5, i64* [[TMP128]], align 8
-// CHECK18-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK18-NEXT:    store i64 8, i64* [[TMP129]], align 8
-// CHECK18-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 5
-// CHECK18-NEXT:    store i8* null, i8** [[TMP130]], align 8
-// CHECK18-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 6
-// CHECK18-NEXT:    [[TMP132:%.*]] = bitcast i8** [[TMP131]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP132]], align 8
-// CHECK18-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 6
-// CHECK18-NEXT:    [[TMP134:%.*]] = bitcast i8** [[TMP133]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP134]], align 8
-// CHECK18-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK18-NEXT:    store i64 8, i64* [[TMP135]], align 8
-// CHECK18-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 6
-// CHECK18-NEXT:    store i8* null, i8** [[TMP136]], align 8
-// CHECK18-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 7
-// CHECK18-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to double**
-// CHECK18-NEXT:    store double* [[VLA1]], double** [[TMP138]], align 8
-// CHECK18-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 7
-// CHECK18-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to double**
-// CHECK18-NEXT:    store double* [[VLA1]], double** [[TMP140]], align 8
-// CHECK18-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK18-NEXT:    store i64 [[TMP94]], i64* [[TMP141]], align 8
-// CHECK18-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 7
+// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP128]], align 8
+// CHECK18-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 6
+// CHECK18-NEXT:    [[TMP130:%.*]] = bitcast i8** [[TMP129]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP130]], align 8
+// CHECK18-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 6
+// CHECK18-NEXT:    store i8* null, i8** [[TMP131]], align 8
+// CHECK18-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 7
+// CHECK18-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to double**
+// CHECK18-NEXT:    store double* [[VLA1]], double** [[TMP133]], align 8
+// CHECK18-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 7
+// CHECK18-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to double**
+// CHECK18-NEXT:    store double* [[VLA1]], double** [[TMP135]], align 8
+// CHECK18-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK18-NEXT:    store i64 [[TMP94]], i64* [[TMP136]], align 8
+// CHECK18-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 7
+// CHECK18-NEXT:    store i8* null, i8** [[TMP137]], align 8
+// CHECK18-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 8
+// CHECK18-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to %struct.TT**
+// CHECK18-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP139]], align 8
+// CHECK18-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 8
+// CHECK18-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to %struct.TT**
+// CHECK18-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP141]], align 8
+// CHECK18-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 8
 // CHECK18-NEXT:    store i8* null, i8** [[TMP142]], align 8
-// CHECK18-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 8
-// CHECK18-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to %struct.TT**
-// CHECK18-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP144]], align 8
-// CHECK18-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 8
-// CHECK18-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to %struct.TT**
-// CHECK18-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP146]], align 8
-// CHECK18-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK18-NEXT:    store i64 16, i64* [[TMP147]], align 8
-// CHECK18-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS27]], i64 0, i64 8
-// CHECK18-NEXT:    store i8* null, i8** [[TMP148]], align 8
-// CHECK18-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP152:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP149]], i8** [[TMP150]], i64* [[TMP151]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP153:%.*]] = icmp ne i32 [[TMP152]], 0
-// CHECK18-NEXT:    br i1 [[TMP153]], label [[OMP_OFFLOAD_FAILED28:%.*]], label [[OMP_OFFLOAD_CONT29:%.*]]
+// CHECK18-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS25]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS26]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP146:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP143]], i8** [[TMP144]], i64* [[TMP145]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.11, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP147:%.*]] = icmp ne i32 [[TMP146]], 0
+// CHECK18-NEXT:    br i1 [[TMP147]], label [[OMP_OFFLOAD_FAILED28:%.*]], label [[OMP_OFFLOAD_CONT29:%.*]]
 // CHECK18:       omp_offload.failed28:
 // CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142(i64 [[TMP90]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT29]]
@@ -9561,52 +9459,52 @@ int bar(int n){
 // CHECK18-NEXT:    br label [[OMP_IF_END31]]
 // CHECK18:       omp_if.end31:
 // CHECK18-NEXT:    store i32 0, i32* [[NN]], align 4
-// CHECK18-NEXT:    [[TMP154:%.*]] = load i32, i32* [[NN]], align 4
+// CHECK18-NEXT:    [[TMP148:%.*]] = load i32, i32* [[NN]], align 4
 // CHECK18-NEXT:    [[CONV32:%.*]] = bitcast i64* [[NN_CASTED]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP154]], i32* [[CONV32]], align 4
-// CHECK18-NEXT:    [[TMP155:%.*]] = load i64, i64* [[NN_CASTED]], align 8
-// CHECK18-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP155]], i64* [[TMP157]], align 8
-// CHECK18-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP155]], i64* [[TMP159]], align 8
-// CHECK18-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i64 0, i64 0
-// CHECK18-NEXT:    store i8* null, i8** [[TMP160]], align 8
-// CHECK18-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP163:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154.region_id, i32 1, i8** [[TMP161]], i8** [[TMP162]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.13, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP164:%.*]] = icmp ne i32 [[TMP163]], 0
-// CHECK18-NEXT:    br i1 [[TMP164]], label [[OMP_OFFLOAD_FAILED36:%.*]], label [[OMP_OFFLOAD_CONT37:%.*]]
+// CHECK18-NEXT:    store i32 [[TMP148]], i32* [[CONV32]], align 4
+// CHECK18-NEXT:    [[TMP149:%.*]] = load i64, i64* [[NN_CASTED]], align 8
+// CHECK18-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP149]], i64* [[TMP151]], align 8
+// CHECK18-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP149]], i64* [[TMP153]], align 8
+// CHECK18-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i64 0, i64 0
+// CHECK18-NEXT:    store i8* null, i8** [[TMP154]], align 8
+// CHECK18-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP157:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154.region_id, i32 1, i8** [[TMP155]], i8** [[TMP156]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP158:%.*]] = icmp ne i32 [[TMP157]], 0
+// CHECK18-NEXT:    br i1 [[TMP158]], label [[OMP_OFFLOAD_FAILED36:%.*]], label [[OMP_OFFLOAD_CONT37:%.*]]
 // CHECK18:       omp_offload.failed36:
-// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154(i64 [[TMP155]]) #[[ATTR3]]
+// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154(i64 [[TMP149]]) #[[ATTR3]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT37]]
 // CHECK18:       omp_offload.cont37:
-// CHECK18-NEXT:    [[TMP165:%.*]] = load i32, i32* [[NN]], align 4
+// CHECK18-NEXT:    [[TMP159:%.*]] = load i32, i32* [[NN]], align 4
 // CHECK18-NEXT:    [[CONV39:%.*]] = bitcast i64* [[NN_CASTED38]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP165]], i32* [[CONV39]], align 4
-// CHECK18-NEXT:    [[TMP166:%.*]] = load i64, i64* [[NN_CASTED38]], align 8
-// CHECK18-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS40]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP168:%.*]] = bitcast i8** [[TMP167]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP166]], i64* [[TMP168]], align 8
-// CHECK18-NEXT:    [[TMP169:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS41]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP170:%.*]] = bitcast i8** [[TMP169]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP166]], i64* [[TMP170]], align 8
-// CHECK18-NEXT:    [[TMP171:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS42]], i64 0, i64 0
-// CHECK18-NEXT:    store i8* null, i8** [[TMP171]], align 8
-// CHECK18-NEXT:    [[TMP172:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS40]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP173:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS41]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP174:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157.region_id, i32 1, i8** [[TMP172]], i8** [[TMP173]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP175:%.*]] = icmp ne i32 [[TMP174]], 0
-// CHECK18-NEXT:    br i1 [[TMP175]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
+// CHECK18-NEXT:    store i32 [[TMP159]], i32* [[CONV39]], align 4
+// CHECK18-NEXT:    [[TMP160:%.*]] = load i64, i64* [[NN_CASTED38]], align 8
+// CHECK18-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS40]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP162:%.*]] = bitcast i8** [[TMP161]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP160]], i64* [[TMP162]], align 8
+// CHECK18-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS41]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP164:%.*]] = bitcast i8** [[TMP163]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP160]], i64* [[TMP164]], align 8
+// CHECK18-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS42]], i64 0, i64 0
+// CHECK18-NEXT:    store i8* null, i8** [[TMP165]], align 8
+// CHECK18-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS40]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS41]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP168:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157.region_id, i32 1, i8** [[TMP166]], i8** [[TMP167]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.18, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.19, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP169:%.*]] = icmp ne i32 [[TMP168]], 0
+// CHECK18-NEXT:    br i1 [[TMP169]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
 // CHECK18:       omp_offload.failed43:
-// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157(i64 [[TMP166]]) #[[ATTR3]]
+// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157(i64 [[TMP160]]) #[[ATTR3]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT44]]
 // CHECK18:       omp_offload.cont44:
-// CHECK18-NEXT:    [[TMP176:%.*]] = load i32, i32* [[A]], align 4
-// CHECK18-NEXT:    [[TMP177:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP177]])
-// CHECK18-NEXT:    ret i32 [[TMP176]]
+// CHECK18-NEXT:    [[TMP170:%.*]] = load i32, i32* [[A]], align 4
+// CHECK18-NEXT:    [[TMP171:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP171]])
+// CHECK18-NEXT:    ret i32 [[TMP170]]
 //
 //
 // CHECK18-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101
@@ -9988,11 +9886,11 @@ int bar(int n){
 // CHECK18-NEXT:    [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP0]], i32* [[CONV1]], align 4
 // CHECK18-NEXT:    [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP1]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP1]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..12
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10007,11 +9905,11 @@ int bar(int n){
 // CHECK18-NEXT:    [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP0]], i32* [[CONV1]], align 4
 // CHECK18-NEXT:    [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP1]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP1]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10035,11 +9933,11 @@ int bar(int n){
 // CHECK18-NEXT:    [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP0]], i32* [[CONV1]], align 4
 // CHECK18-NEXT:    [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP1]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP1]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10049,11 +9947,11 @@ int bar(int n){
 // CHECK18-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
 // CHECK18-NEXT:    store i64 [[NN]], i64* [[NN_ADDR]], align 8
 // CHECK18-NEXT:    [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32*
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32* [[CONV]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i32* [[CONV]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..17
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[NN:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10088,7 +9986,7 @@ int bar(int n){
 // CHECK18-NEXT:    store i8* null, i8** [[TMP6]], align 8
 // CHECK18-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP9:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l182.region_id, i32 1, i8** [[TMP7]], i8** [[TMP8]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP9:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l182.region_id, i32 1, i8** [[TMP7]], i8** [[TMP8]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.21, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.22, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK18-NEXT:    [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
 // CHECK18-NEXT:    br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK18:       omp_offload.failed:
@@ -10104,11 +10002,11 @@ int bar(int n){
 // CHECK18-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
 // CHECK18-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
 // CHECK18-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..20 to void (i32*, i32*, ...)*), i64 [[TMP0]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..19
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..20
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10191,14 +10089,14 @@ int bar(int n){
 // CHECK18-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP8:%.*]] = mul nuw i64 2, [[TMP2]]
 // CHECK18-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2
-// CHECK18-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK18-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
-// CHECK18-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK18-NEXT:    store double* [[A]], double** [[TMP13]], align 8
-// CHECK18-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 8, i64* [[TMP14]], align 8
+// CHECK18-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.24 to i8*), i64 40, i1 false)
+// CHECK18-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK18-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 8
+// CHECK18-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK18-NEXT:    store double* [[A]], double** [[TMP14]], align 8
 // CHECK18-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK18-NEXT:    store i8* null, i8** [[TMP15]], align 8
 // CHECK18-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -10207,46 +10105,40 @@ int bar(int n){
 // CHECK18-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK18-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i64*
 // CHECK18-NEXT:    store i64 [[TMP6]], i64* [[TMP19]], align 8
-// CHECK18-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 4, i64* [[TMP20]], align 8
-// CHECK18-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP21]], align 8
-// CHECK18-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
-// CHECK18-NEXT:    store i64 2, i64* [[TMP23]], align 8
-// CHECK18-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK18-NEXT:    store i64 2, i64* [[TMP25]], align 8
-// CHECK18-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 8, i64* [[TMP26]], align 8
-// CHECK18-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP27]], align 8
-// CHECK18-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP20]], align 8
+// CHECK18-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
+// CHECK18-NEXT:    store i64 2, i64* [[TMP22]], align 8
+// CHECK18-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK18-NEXT:    store i64 2, i64* [[TMP24]], align 8
+// CHECK18-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP25]], align 8
+// CHECK18-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP27]], align 8
+// CHECK18-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK18-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
 // CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP29]], align 8
-// CHECK18-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP31]], align 8
-// CHECK18-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK18-NEXT:    store i64 8, i64* [[TMP32]], align 8
-// CHECK18-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK18-NEXT:    store i8* null, i8** [[TMP33]], align 8
-// CHECK18-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK18-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK18-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 8
-// CHECK18-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK18-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK18-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 8
-// CHECK18-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK18-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 8
-// CHECK18-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK18-NEXT:    store i8* null, i8** [[TMP39]], align 8
-// CHECK18-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.23, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK18-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK18-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK18-NEXT:    store i8* null, i8** [[TMP30]], align 8
+// CHECK18-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK18-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK18-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 8
+// CHECK18-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK18-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK18-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 8
+// CHECK18-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK18-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 8
+// CHECK18-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK18-NEXT:    store i8* null, i8** [[TMP36]], align 8
+// CHECK18-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK18-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK18:       omp_offload.failed:
 // CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -10256,15 +10148,15 @@ int bar(int n){
 // CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK18-NEXT:    br label [[OMP_IF_END]]
 // CHECK18:       omp_if.end:
-// CHECK18-NEXT:    [[TMP45:%.*]] = mul nsw i64 1, [[TMP2]]
-// CHECK18-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP45]]
+// CHECK18-NEXT:    [[TMP42:%.*]] = mul nsw i64 1, [[TMP2]]
+// CHECK18-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP42]]
 // CHECK18-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
-// CHECK18-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK18-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK18-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK18-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP47]]
-// CHECK18-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK18-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK18-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK18-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK18-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP44]]
+// CHECK18-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK18-NEXT:    ret i32 [[ADD4]]
 //
 //
@@ -10336,7 +10228,7 @@ int bar(int n){
 // CHECK18-NEXT:    store i8* null, i8** [[TMP26]], align 8
 // CHECK18-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l209.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.25, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.26, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l209.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK18-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK18-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK18:       omp_offload.failed:
@@ -10405,7 +10297,7 @@ int bar(int n){
 // CHECK18-NEXT:    store i8* null, i8** [[TMP19]], align 8
 // CHECK18-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l192.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l192.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.30, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.31, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK18-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK18-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK18:       omp_offload.failed:
@@ -10444,11 +10336,11 @@ int bar(int n){
 // CHECK18-NEXT:    [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK18-NEXT:    [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10517,11 +10409,11 @@ int bar(int n){
 // CHECK18-NEXT:    [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
 // CHECK18-NEXT:    store i8 [[TMP5]], i8* [[CONV5]], align 1
 // CHECK18-NEXT:    [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..24 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..24
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10582,11 +10474,11 @@ int bar(int n){
 // CHECK18-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
 // CHECK18-NEXT:    store i16 [[TMP3]], i16* [[CONV3]], align 2
 // CHECK18-NEXT:    [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..27
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..29
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10833,14 +10725,14 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP92:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK19-NEXT:    [[TMP93:%.*]] = mul nuw i32 [[TMP92]], 8
 // CHECK19-NEXT:    [[TMP94:%.*]] = sext i32 [[TMP93]] to i64
-// CHECK19-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP88]], i32* [[TMP96]], align 4
-// CHECK19-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP88]], i32* [[TMP98]], align 4
-// CHECK19-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 4, i64* [[TMP99]], align 4
+// CHECK19-NEXT:    [[TMP95:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP95]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.10 to i8*), i32 72, i1 false)
+// CHECK19-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP88]], i32* [[TMP97]], align 4
+// CHECK19-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP88]], i32* [[TMP99]], align 4
 // CHECK19-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 0
 // CHECK19-NEXT:    store i8* null, i8** [[TMP100]], align 4
 // CHECK19-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 1
@@ -10849,86 +10741,74 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 1
 // CHECK19-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to [10 x float]**
 // CHECK19-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP104]], align 4
-// CHECK19-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 40, i64* [[TMP105]], align 4
-// CHECK19-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP106]], align 4
-// CHECK19-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP108]], align 4
-// CHECK19-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP110]], align 4
-// CHECK19-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 4, i64* [[TMP111]], align 4
-// CHECK19-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP112]], align 4
-// CHECK19-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP105]], align 4
+// CHECK19-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP107]], align 4
+// CHECK19-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP109]], align 4
+// CHECK19-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP110]], align 4
+// CHECK19-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to float**
+// CHECK19-NEXT:    store float* [[VLA]], float** [[TMP112]], align 4
+// CHECK19-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
 // CHECK19-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to float**
 // CHECK19-NEXT:    store float* [[VLA]], float** [[TMP114]], align 4
-// CHECK19-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to float**
-// CHECK19-NEXT:    store float* [[VLA]], float** [[TMP116]], align 4
-// CHECK19-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK19-NEXT:    store i64 [[TMP91]], i64* [[TMP117]], align 4
-// CHECK19-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 3
-// CHECK19-NEXT:    store i8* null, i8** [[TMP118]], align 4
-// CHECK19-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
+// CHECK19-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK19-NEXT:    store i64 [[TMP91]], i64* [[TMP115]], align 4
+// CHECK19-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 3
+// CHECK19-NEXT:    store i8* null, i8** [[TMP116]], align 4
+// CHECK19-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
+// CHECK19-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to [5 x [10 x double]]**
+// CHECK19-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP118]], align 4
+// CHECK19-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
 // CHECK19-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to [5 x [10 x double]]**
 // CHECK19-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP120]], align 4
-// CHECK19-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
-// CHECK19-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to [5 x [10 x double]]**
-// CHECK19-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP122]], align 4
-// CHECK19-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK19-NEXT:    store i64 400, i64* [[TMP123]], align 4
-// CHECK19-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 4
-// CHECK19-NEXT:    store i8* null, i8** [[TMP124]], align 4
-// CHECK19-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 5
-// CHECK19-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i32*
-// CHECK19-NEXT:    store i32 5, i32* [[TMP126]], align 4
-// CHECK19-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 5
+// CHECK19-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 4
+// CHECK19-NEXT:    store i8* null, i8** [[TMP121]], align 4
+// CHECK19-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 5
+// CHECK19-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i32*
+// CHECK19-NEXT:    store i32 5, i32* [[TMP123]], align 4
+// CHECK19-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 5
+// CHECK19-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to i32*
+// CHECK19-NEXT:    store i32 5, i32* [[TMP125]], align 4
+// CHECK19-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 5
+// CHECK19-NEXT:    store i8* null, i8** [[TMP126]], align 4
+// CHECK19-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 6
 // CHECK19-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i32*
-// CHECK19-NEXT:    store i32 5, i32* [[TMP128]], align 4
-// CHECK19-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK19-NEXT:    store i64 4, i64* [[TMP129]], align 4
-// CHECK19-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 5
-// CHECK19-NEXT:    store i8* null, i8** [[TMP130]], align 4
-// CHECK19-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 6
-// CHECK19-NEXT:    [[TMP132:%.*]] = bitcast i8** [[TMP131]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP132]], align 4
-// CHECK19-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 6
-// CHECK19-NEXT:    [[TMP134:%.*]] = bitcast i8** [[TMP133]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP134]], align 4
-// CHECK19-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK19-NEXT:    store i64 4, i64* [[TMP135]], align 4
-// CHECK19-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 6
-// CHECK19-NEXT:    store i8* null, i8** [[TMP136]], align 4
-// CHECK19-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 7
-// CHECK19-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to double**
-// CHECK19-NEXT:    store double* [[VLA1]], double** [[TMP138]], align 4
-// CHECK19-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 7
-// CHECK19-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to double**
-// CHECK19-NEXT:    store double* [[VLA1]], double** [[TMP140]], align 4
-// CHECK19-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK19-NEXT:    store i64 [[TMP94]], i64* [[TMP141]], align 4
-// CHECK19-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 7
+// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP128]], align 4
+// CHECK19-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 6
+// CHECK19-NEXT:    [[TMP130:%.*]] = bitcast i8** [[TMP129]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP130]], align 4
+// CHECK19-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 6
+// CHECK19-NEXT:    store i8* null, i8** [[TMP131]], align 4
+// CHECK19-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 7
+// CHECK19-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to double**
+// CHECK19-NEXT:    store double* [[VLA1]], double** [[TMP133]], align 4
+// CHECK19-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 7
+// CHECK19-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to double**
+// CHECK19-NEXT:    store double* [[VLA1]], double** [[TMP135]], align 4
+// CHECK19-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK19-NEXT:    store i64 [[TMP94]], i64* [[TMP136]], align 4
+// CHECK19-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 7
+// CHECK19-NEXT:    store i8* null, i8** [[TMP137]], align 4
+// CHECK19-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 8
+// CHECK19-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to %struct.TT**
+// CHECK19-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP139]], align 4
+// CHECK19-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 8
+// CHECK19-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to %struct.TT**
+// CHECK19-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP141]], align 4
+// CHECK19-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 8
 // CHECK19-NEXT:    store i8* null, i8** [[TMP142]], align 4
-// CHECK19-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 8
-// CHECK19-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to %struct.TT**
-// CHECK19-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP144]], align 4
-// CHECK19-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 8
-// CHECK19-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to %struct.TT**
-// CHECK19-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP146]], align 4
-// CHECK19-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK19-NEXT:    store i64 12, i64* [[TMP147]], align 4
-// CHECK19-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 8
-// CHECK19-NEXT:    store i8* null, i8** [[TMP148]], align 4
-// CHECK19-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP152:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP149]], i8** [[TMP150]], i64* [[TMP151]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP153:%.*]] = icmp ne i32 [[TMP152]], 0
-// CHECK19-NEXT:    br i1 [[TMP153]], label [[OMP_OFFLOAD_FAILED23:%.*]], label [[OMP_OFFLOAD_CONT24:%.*]]
+// CHECK19-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP146:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP143]], i8** [[TMP144]], i64* [[TMP145]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.11, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP147:%.*]] = icmp ne i32 [[TMP146]], 0
+// CHECK19-NEXT:    br i1 [[TMP147]], label [[OMP_OFFLOAD_FAILED23:%.*]], label [[OMP_OFFLOAD_CONT24:%.*]]
 // CHECK19:       omp_offload.failed23:
 // CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142(i32 [[TMP88]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT24]]
@@ -10939,50 +10819,50 @@ int bar(int n){
 // CHECK19-NEXT:    br label [[OMP_IF_END26]]
 // CHECK19:       omp_if.end26:
 // CHECK19-NEXT:    store i32 0, i32* [[NN]], align 4
-// CHECK19-NEXT:    [[TMP154:%.*]] = load i32, i32* [[NN]], align 4
-// CHECK19-NEXT:    store i32 [[TMP154]], i32* [[NN_CASTED]], align 4
-// CHECK19-NEXT:    [[TMP155:%.*]] = load i32, i32* [[NN_CASTED]], align 4
-// CHECK19-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP155]], i32* [[TMP157]], align 4
-// CHECK19-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP155]], i32* [[TMP159]], align 4
-// CHECK19-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS29]], i32 0, i32 0
-// CHECK19-NEXT:    store i8* null, i8** [[TMP160]], align 4
-// CHECK19-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP163:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154.region_id, i32 1, i8** [[TMP161]], i8** [[TMP162]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.13, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP164:%.*]] = icmp ne i32 [[TMP163]], 0
-// CHECK19-NEXT:    br i1 [[TMP164]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK19-NEXT:    [[TMP148:%.*]] = load i32, i32* [[NN]], align 4
+// CHECK19-NEXT:    store i32 [[TMP148]], i32* [[NN_CASTED]], align 4
+// CHECK19-NEXT:    [[TMP149:%.*]] = load i32, i32* [[NN_CASTED]], align 4
+// CHECK19-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP149]], i32* [[TMP151]], align 4
+// CHECK19-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP149]], i32* [[TMP153]], align 4
+// CHECK19-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS29]], i32 0, i32 0
+// CHECK19-NEXT:    store i8* null, i8** [[TMP154]], align 4
+// CHECK19-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP157:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154.region_id, i32 1, i8** [[TMP155]], i8** [[TMP156]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP158:%.*]] = icmp ne i32 [[TMP157]], 0
+// CHECK19-NEXT:    br i1 [[TMP158]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
 // CHECK19:       omp_offload.failed30:
-// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154(i32 [[TMP155]]) #[[ATTR3]]
+// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154(i32 [[TMP149]]) #[[ATTR3]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
 // CHECK19:       omp_offload.cont31:
-// CHECK19-NEXT:    [[TMP165:%.*]] = load i32, i32* [[NN]], align 4
-// CHECK19-NEXT:    store i32 [[TMP165]], i32* [[NN_CASTED32]], align 4
-// CHECK19-NEXT:    [[TMP166:%.*]] = load i32, i32* [[NN_CASTED32]], align 4
-// CHECK19-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP168:%.*]] = bitcast i8** [[TMP167]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP166]], i32* [[TMP168]], align 4
-// CHECK19-NEXT:    [[TMP169:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP170:%.*]] = bitcast i8** [[TMP169]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP166]], i32* [[TMP170]], align 4
-// CHECK19-NEXT:    [[TMP171:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 0
-// CHECK19-NEXT:    store i8* null, i8** [[TMP171]], align 4
-// CHECK19-NEXT:    [[TMP172:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP173:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP174:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157.region_id, i32 1, i8** [[TMP172]], i8** [[TMP173]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP175:%.*]] = icmp ne i32 [[TMP174]], 0
-// CHECK19-NEXT:    br i1 [[TMP175]], label [[OMP_OFFLOAD_FAILED36:%.*]], label [[OMP_OFFLOAD_CONT37:%.*]]
+// CHECK19-NEXT:    [[TMP159:%.*]] = load i32, i32* [[NN]], align 4
+// CHECK19-NEXT:    store i32 [[TMP159]], i32* [[NN_CASTED32]], align 4
+// CHECK19-NEXT:    [[TMP160:%.*]] = load i32, i32* [[NN_CASTED32]], align 4
+// CHECK19-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP162:%.*]] = bitcast i8** [[TMP161]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP160]], i32* [[TMP162]], align 4
+// CHECK19-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP164:%.*]] = bitcast i8** [[TMP163]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP160]], i32* [[TMP164]], align 4
+// CHECK19-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 0
+// CHECK19-NEXT:    store i8* null, i8** [[TMP165]], align 4
+// CHECK19-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP168:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157.region_id, i32 1, i8** [[TMP166]], i8** [[TMP167]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.18, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.19, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP169:%.*]] = icmp ne i32 [[TMP168]], 0
+// CHECK19-NEXT:    br i1 [[TMP169]], label [[OMP_OFFLOAD_FAILED36:%.*]], label [[OMP_OFFLOAD_CONT37:%.*]]
 // CHECK19:       omp_offload.failed36:
-// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157(i32 [[TMP166]]) #[[ATTR3]]
+// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157(i32 [[TMP160]]) #[[ATTR3]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT37]]
 // CHECK19:       omp_offload.cont37:
-// CHECK19-NEXT:    [[TMP176:%.*]] = load i32, i32* [[A]], align 4
-// CHECK19-NEXT:    [[TMP177:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP177]])
-// CHECK19-NEXT:    ret i32 [[TMP176]]
+// CHECK19-NEXT:    [[TMP170:%.*]] = load i32, i32* [[A]], align 4
+// CHECK19-NEXT:    [[TMP171:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP171]])
+// CHECK19-NEXT:    ret i32 [[TMP170]]
 //
 //
 // CHECK19-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101
@@ -11349,11 +11229,11 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP0:%.*]] = load i32, i32* [[NN_ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP0]], i32* [[NN_CASTED]], align 4
 // CHECK19-NEXT:    [[TMP1:%.*]] = load i32, i32* [[NN_CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP1]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP1]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..12
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -11366,11 +11246,11 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP0:%.*]] = load i32, i32* [[NN_ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP0]], i32* [[NN_CASTED]], align 4
 // CHECK19-NEXT:    [[TMP1:%.*]] = load i32, i32* [[NN_CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP1]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP1]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -11391,11 +11271,11 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP0:%.*]] = load i32, i32* [[NN_ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP0]], i32* [[NN_CASTED]], align 4
 // CHECK19-NEXT:    [[TMP1:%.*]] = load i32, i32* [[NN_CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP1]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP1]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -11404,11 +11284,11 @@ int bar(int n){
 // CHECK19-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK19-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[NN]], i32* [[NN_ADDR]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32* [[NN_ADDR]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i32* [[NN_ADDR]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..17
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[NN:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -11442,7 +11322,7 @@ int bar(int n){
 // CHECK19-NEXT:    store i8* null, i8** [[TMP5]], align 4
 // CHECK19-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l182.region_id, i32 1, i8** [[TMP6]], i8** [[TMP7]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l182.region_id, i32 1, i8** [[TMP6]], i8** [[TMP7]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.21, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.22, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK19-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
 // CHECK19-NEXT:    br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK19:       omp_offload.failed:
@@ -11458,11 +11338,11 @@ int bar(int n){
 // CHECK19-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
 // CHECK19-NEXT:    [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..20 to void (i32*, i32*, ...)*), i32 [[TMP0]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..19
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..20
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[VLA:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -11544,14 +11424,14 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP7:%.*]] = mul nuw i32 2, [[TMP1]]
 // CHECK19-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 2
 // CHECK19-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-// CHECK19-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK19-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
-// CHECK19-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK19-NEXT:    store double* [[A]], double** [[TMP13]], align 4
-// CHECK19-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 8, i64* [[TMP14]], align 4
+// CHECK19-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.24 to i8*), i32 40, i1 false)
+// CHECK19-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK19-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 4
+// CHECK19-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK19-NEXT:    store double* [[A]], double** [[TMP14]], align 4
 // CHECK19-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK19-NEXT:    store i8* null, i8** [[TMP15]], align 4
 // CHECK19-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -11560,46 +11440,40 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK19-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32*
 // CHECK19-NEXT:    store i32 [[TMP5]], i32* [[TMP19]], align 4
-// CHECK19-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP20]], align 4
-// CHECK19-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP21]], align 4
-// CHECK19-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
-// CHECK19-NEXT:    store i32 2, i32* [[TMP23]], align 4
-// CHECK19-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
-// CHECK19-NEXT:    store i32 2, i32* [[TMP25]], align 4
-// CHECK19-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 4, i64* [[TMP26]], align 4
-// CHECK19-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP27]], align 4
-// CHECK19-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP20]], align 4
+// CHECK19-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
+// CHECK19-NEXT:    store i32 2, i32* [[TMP22]], align 4
+// CHECK19-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
+// CHECK19-NEXT:    store i32 2, i32* [[TMP24]], align 4
+// CHECK19-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP25]], align 4
+// CHECK19-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP27]], align 4
+// CHECK19-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK19-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
 // CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP29]], align 4
-// CHECK19-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP31]], align 4
-// CHECK19-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK19-NEXT:    store i64 4, i64* [[TMP32]], align 4
-// CHECK19-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK19-NEXT:    store i8* null, i8** [[TMP33]], align 4
-// CHECK19-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK19-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK19-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 4
-// CHECK19-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK19-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK19-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 4
-// CHECK19-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK19-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 4
-// CHECK19-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK19-NEXT:    store i8* null, i8** [[TMP39]], align 4
-// CHECK19-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.23, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK19-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK19-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK19-NEXT:    store i8* null, i8** [[TMP30]], align 4
+// CHECK19-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK19-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK19-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 4
+// CHECK19-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK19-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK19-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 4
+// CHECK19-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK19-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 4
+// CHECK19-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK19-NEXT:    store i8* null, i8** [[TMP36]], align 4
+// CHECK19-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK19-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK19:       omp_offload.failed:
 // CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -11609,15 +11483,15 @@ int bar(int n){
 // CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK19-NEXT:    br label [[OMP_IF_END]]
 // CHECK19:       omp_if.end:
-// CHECK19-NEXT:    [[TMP45:%.*]] = mul nsw i32 1, [[TMP1]]
-// CHECK19-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP45]]
+// CHECK19-NEXT:    [[TMP42:%.*]] = mul nsw i32 1, [[TMP1]]
+// CHECK19-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP42]]
 // CHECK19-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
-// CHECK19-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK19-NEXT:    [[CONV:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK19-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK19-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP47]]
-// CHECK19-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK19-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK19-NEXT:    [[CONV:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK19-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK19-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP44]]
+// CHECK19-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK19-NEXT:    ret i32 [[ADD3]]
 //
 //
@@ -11688,7 +11562,7 @@ int bar(int n){
 // CHECK19-NEXT:    store i8* null, i8** [[TMP26]], align 4
 // CHECK19-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l209.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.25, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.26, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l209.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK19-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK19-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK19:       omp_offload.failed:
@@ -11756,7 +11630,7 @@ int bar(int n){
 // CHECK19-NEXT:    store i8* null, i8** [[TMP19]], align 4
 // CHECK19-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l192.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l192.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.30, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.31, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK19-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK19-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK19:       omp_offload.failed:
@@ -11793,11 +11667,11 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
 // CHECK19-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -11863,11 +11737,11 @@ int bar(int n){
 // CHECK19-NEXT:    [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
 // CHECK19-NEXT:    store i8 [[TMP5]], i8* [[CONV3]], align 1
 // CHECK19-NEXT:    [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..24 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..24
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -11925,11 +11799,11 @@ int bar(int n){
 // CHECK19-NEXT:    [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
 // CHECK19-NEXT:    store i16 [[TMP3]], i16* [[CONV1]], align 2
 // CHECK19-NEXT:    [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..27
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..29
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -12175,14 +12049,14 @@ int bar(int n){
 // CHECK20-NEXT:    [[TMP92:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK20-NEXT:    [[TMP93:%.*]] = mul nuw i32 [[TMP92]], 8
 // CHECK20-NEXT:    [[TMP94:%.*]] = sext i32 [[TMP93]] to i64
-// CHECK20-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP88]], i32* [[TMP96]], align 4
-// CHECK20-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP88]], i32* [[TMP98]], align 4
-// CHECK20-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 4, i64* [[TMP99]], align 4
+// CHECK20-NEXT:    [[TMP95:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP95]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.10 to i8*), i32 72, i1 false)
+// CHECK20-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP88]], i32* [[TMP97]], align 4
+// CHECK20-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP88]], i32* [[TMP99]], align 4
 // CHECK20-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 0
 // CHECK20-NEXT:    store i8* null, i8** [[TMP100]], align 4
 // CHECK20-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 1
@@ -12191,86 +12065,74 @@ int bar(int n){
 // CHECK20-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 1
 // CHECK20-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to [10 x float]**
 // CHECK20-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP104]], align 4
-// CHECK20-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 40, i64* [[TMP105]], align 4
-// CHECK20-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP106]], align 4
-// CHECK20-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP108]], align 4
-// CHECK20-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP110]], align 4
-// CHECK20-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 4, i64* [[TMP111]], align 4
-// CHECK20-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP112]], align 4
-// CHECK20-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP105]], align 4
+// CHECK20-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP107]], align 4
+// CHECK20-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP109]], align 4
+// CHECK20-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP110]], align 4
+// CHECK20-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to float**
+// CHECK20-NEXT:    store float* [[VLA]], float** [[TMP112]], align 4
+// CHECK20-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
 // CHECK20-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to float**
 // CHECK20-NEXT:    store float* [[VLA]], float** [[TMP114]], align 4
-// CHECK20-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to float**
-// CHECK20-NEXT:    store float* [[VLA]], float** [[TMP116]], align 4
-// CHECK20-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK20-NEXT:    store i64 [[TMP91]], i64* [[TMP117]], align 4
-// CHECK20-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 3
-// CHECK20-NEXT:    store i8* null, i8** [[TMP118]], align 4
-// CHECK20-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
+// CHECK20-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK20-NEXT:    store i64 [[TMP91]], i64* [[TMP115]], align 4
+// CHECK20-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 3
+// CHECK20-NEXT:    store i8* null, i8** [[TMP116]], align 4
+// CHECK20-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 4
+// CHECK20-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to [5 x [10 x double]]**
+// CHECK20-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP118]], align 4
+// CHECK20-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
 // CHECK20-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to [5 x [10 x double]]**
 // CHECK20-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP120]], align 4
-// CHECK20-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 4
-// CHECK20-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to [5 x [10 x double]]**
-// CHECK20-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP122]], align 4
-// CHECK20-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK20-NEXT:    store i64 400, i64* [[TMP123]], align 4
-// CHECK20-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 4
-// CHECK20-NEXT:    store i8* null, i8** [[TMP124]], align 4
-// CHECK20-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 5
-// CHECK20-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i32*
-// CHECK20-NEXT:    store i32 5, i32* [[TMP126]], align 4
-// CHECK20-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 5
+// CHECK20-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 4
+// CHECK20-NEXT:    store i8* null, i8** [[TMP121]], align 4
+// CHECK20-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 5
+// CHECK20-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i32*
+// CHECK20-NEXT:    store i32 5, i32* [[TMP123]], align 4
+// CHECK20-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 5
+// CHECK20-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to i32*
+// CHECK20-NEXT:    store i32 5, i32* [[TMP125]], align 4
+// CHECK20-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 5
+// CHECK20-NEXT:    store i8* null, i8** [[TMP126]], align 4
+// CHECK20-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 6
 // CHECK20-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i32*
-// CHECK20-NEXT:    store i32 5, i32* [[TMP128]], align 4
-// CHECK20-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK20-NEXT:    store i64 4, i64* [[TMP129]], align 4
-// CHECK20-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 5
-// CHECK20-NEXT:    store i8* null, i8** [[TMP130]], align 4
-// CHECK20-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 6
-// CHECK20-NEXT:    [[TMP132:%.*]] = bitcast i8** [[TMP131]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP132]], align 4
-// CHECK20-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 6
-// CHECK20-NEXT:    [[TMP134:%.*]] = bitcast i8** [[TMP133]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP134]], align 4
-// CHECK20-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK20-NEXT:    store i64 4, i64* [[TMP135]], align 4
-// CHECK20-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 6
-// CHECK20-NEXT:    store i8* null, i8** [[TMP136]], align 4
-// CHECK20-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 7
-// CHECK20-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to double**
-// CHECK20-NEXT:    store double* [[VLA1]], double** [[TMP138]], align 4
-// CHECK20-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 7
-// CHECK20-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to double**
-// CHECK20-NEXT:    store double* [[VLA1]], double** [[TMP140]], align 4
-// CHECK20-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK20-NEXT:    store i64 [[TMP94]], i64* [[TMP141]], align 4
-// CHECK20-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 7
+// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP128]], align 4
+// CHECK20-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 6
+// CHECK20-NEXT:    [[TMP130:%.*]] = bitcast i8** [[TMP129]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP130]], align 4
+// CHECK20-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 6
+// CHECK20-NEXT:    store i8* null, i8** [[TMP131]], align 4
+// CHECK20-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 7
+// CHECK20-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to double**
+// CHECK20-NEXT:    store double* [[VLA1]], double** [[TMP133]], align 4
+// CHECK20-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 7
+// CHECK20-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to double**
+// CHECK20-NEXT:    store double* [[VLA1]], double** [[TMP135]], align 4
+// CHECK20-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK20-NEXT:    store i64 [[TMP94]], i64* [[TMP136]], align 4
+// CHECK20-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 7
+// CHECK20-NEXT:    store i8* null, i8** [[TMP137]], align 4
+// CHECK20-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 8
+// CHECK20-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to %struct.TT**
+// CHECK20-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP139]], align 4
+// CHECK20-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 8
+// CHECK20-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to %struct.TT**
+// CHECK20-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP141]], align 4
+// CHECK20-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 8
 // CHECK20-NEXT:    store i8* null, i8** [[TMP142]], align 4
-// CHECK20-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 8
-// CHECK20-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to %struct.TT**
-// CHECK20-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP144]], align 4
-// CHECK20-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 8
-// CHECK20-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to %struct.TT**
-// CHECK20-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP146]], align 4
-// CHECK20-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK20-NEXT:    store i64 12, i64* [[TMP147]], align 4
-// CHECK20-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 8
-// CHECK20-NEXT:    store i8* null, i8** [[TMP148]], align 4
-// CHECK20-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP152:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP149]], i8** [[TMP150]], i64* [[TMP151]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP153:%.*]] = icmp ne i32 [[TMP152]], 0
-// CHECK20-NEXT:    br i1 [[TMP153]], label [[OMP_OFFLOAD_FAILED23:%.*]], label [[OMP_OFFLOAD_CONT24:%.*]]
+// CHECK20-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP146:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP143]], i8** [[TMP144]], i64* [[TMP145]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.11, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP147:%.*]] = icmp ne i32 [[TMP146]], 0
+// CHECK20-NEXT:    br i1 [[TMP147]], label [[OMP_OFFLOAD_FAILED23:%.*]], label [[OMP_OFFLOAD_CONT24:%.*]]
 // CHECK20:       omp_offload.failed23:
 // CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142(i32 [[TMP88]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT24]]
@@ -12281,50 +12143,50 @@ int bar(int n){
 // CHECK20-NEXT:    br label [[OMP_IF_END26]]
 // CHECK20:       omp_if.end26:
 // CHECK20-NEXT:    store i32 0, i32* [[NN]], align 4
-// CHECK20-NEXT:    [[TMP154:%.*]] = load i32, i32* [[NN]], align 4
-// CHECK20-NEXT:    store i32 [[TMP154]], i32* [[NN_CASTED]], align 4
-// CHECK20-NEXT:    [[TMP155:%.*]] = load i32, i32* [[NN_CASTED]], align 4
-// CHECK20-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP155]], i32* [[TMP157]], align 4
-// CHECK20-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP155]], i32* [[TMP159]], align 4
-// CHECK20-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS29]], i32 0, i32 0
-// CHECK20-NEXT:    store i8* null, i8** [[TMP160]], align 4
-// CHECK20-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP163:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154.region_id, i32 1, i8** [[TMP161]], i8** [[TMP162]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.13, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP164:%.*]] = icmp ne i32 [[TMP163]], 0
-// CHECK20-NEXT:    br i1 [[TMP164]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK20-NEXT:    [[TMP148:%.*]] = load i32, i32* [[NN]], align 4
+// CHECK20-NEXT:    store i32 [[TMP148]], i32* [[NN_CASTED]], align 4
+// CHECK20-NEXT:    [[TMP149:%.*]] = load i32, i32* [[NN_CASTED]], align 4
+// CHECK20-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP149]], i32* [[TMP151]], align 4
+// CHECK20-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP149]], i32* [[TMP153]], align 4
+// CHECK20-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS29]], i32 0, i32 0
+// CHECK20-NEXT:    store i8* null, i8** [[TMP154]], align 4
+// CHECK20-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP157:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154.region_id, i32 1, i8** [[TMP155]], i8** [[TMP156]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP158:%.*]] = icmp ne i32 [[TMP157]], 0
+// CHECK20-NEXT:    br i1 [[TMP158]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
 // CHECK20:       omp_offload.failed30:
-// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154(i32 [[TMP155]]) #[[ATTR3]]
+// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l154(i32 [[TMP149]]) #[[ATTR3]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
 // CHECK20:       omp_offload.cont31:
-// CHECK20-NEXT:    [[TMP165:%.*]] = load i32, i32* [[NN]], align 4
-// CHECK20-NEXT:    store i32 [[TMP165]], i32* [[NN_CASTED32]], align 4
-// CHECK20-NEXT:    [[TMP166:%.*]] = load i32, i32* [[NN_CASTED32]], align 4
-// CHECK20-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP168:%.*]] = bitcast i8** [[TMP167]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP166]], i32* [[TMP168]], align 4
-// CHECK20-NEXT:    [[TMP169:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP170:%.*]] = bitcast i8** [[TMP169]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP166]], i32* [[TMP170]], align 4
-// CHECK20-NEXT:    [[TMP171:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 0
-// CHECK20-NEXT:    store i8* null, i8** [[TMP171]], align 4
-// CHECK20-NEXT:    [[TMP172:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP173:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP174:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157.region_id, i32 1, i8** [[TMP172]], i8** [[TMP173]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP175:%.*]] = icmp ne i32 [[TMP174]], 0
-// CHECK20-NEXT:    br i1 [[TMP175]], label [[OMP_OFFLOAD_FAILED36:%.*]], label [[OMP_OFFLOAD_CONT37:%.*]]
+// CHECK20-NEXT:    [[TMP159:%.*]] = load i32, i32* [[NN]], align 4
+// CHECK20-NEXT:    store i32 [[TMP159]], i32* [[NN_CASTED32]], align 4
+// CHECK20-NEXT:    [[TMP160:%.*]] = load i32, i32* [[NN_CASTED32]], align 4
+// CHECK20-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP162:%.*]] = bitcast i8** [[TMP161]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP160]], i32* [[TMP162]], align 4
+// CHECK20-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP164:%.*]] = bitcast i8** [[TMP163]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP160]], i32* [[TMP164]], align 4
+// CHECK20-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 0
+// CHECK20-NEXT:    store i8* null, i8** [[TMP165]], align 4
+// CHECK20-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP168:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157.region_id, i32 1, i8** [[TMP166]], i8** [[TMP167]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.18, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.19, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP169:%.*]] = icmp ne i32 [[TMP168]], 0
+// CHECK20-NEXT:    br i1 [[TMP169]], label [[OMP_OFFLOAD_FAILED36:%.*]], label [[OMP_OFFLOAD_CONT37:%.*]]
 // CHECK20:       omp_offload.failed36:
-// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157(i32 [[TMP166]]) #[[ATTR3]]
+// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l157(i32 [[TMP160]]) #[[ATTR3]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT37]]
 // CHECK20:       omp_offload.cont37:
-// CHECK20-NEXT:    [[TMP176:%.*]] = load i32, i32* [[A]], align 4
-// CHECK20-NEXT:    [[TMP177:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP177]])
-// CHECK20-NEXT:    ret i32 [[TMP176]]
+// CHECK20-NEXT:    [[TMP170:%.*]] = load i32, i32* [[A]], align 4
+// CHECK20-NEXT:    [[TMP171:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP171]])
+// CHECK20-NEXT:    ret i32 [[TMP170]]
 //
 //
 // CHECK20-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101
@@ -12691,11 +12553,11 @@ int bar(int n){
 // CHECK20-NEXT:    [[TMP0:%.*]] = load i32, i32* [[NN_ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP0]], i32* [[NN_CASTED]], align 4
 // CHECK20-NEXT:    [[TMP1:%.*]] = load i32, i32* [[NN_CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP1]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP1]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..12
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -12708,11 +12570,11 @@ int bar(int n){
 // CHECK20-NEXT:    [[TMP0:%.*]] = load i32, i32* [[NN_ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP0]], i32* [[NN_CASTED]], align 4
 // CHECK20-NEXT:    [[TMP1:%.*]] = load i32, i32* [[NN_CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP1]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP1]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -12733,11 +12595,11 @@ int bar(int n){
 // CHECK20-NEXT:    [[TMP0:%.*]] = load i32, i32* [[NN_ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP0]], i32* [[NN_CASTED]], align 4
 // CHECK20-NEXT:    [[TMP1:%.*]] = load i32, i32* [[NN_CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP1]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP1]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..16
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -12746,11 +12608,11 @@ int bar(int n){
 // CHECK20-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4
 // CHECK20-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[NN]], i32* [[NN_ADDR]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32* [[NN_ADDR]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i32* [[NN_ADDR]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..17
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[NN:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -12784,7 +12646,7 @@ int bar(int n){
 // CHECK20-NEXT:    store i8* null, i8** [[TMP5]], align 4
 // CHECK20-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l182.region_id, i32 1, i8** [[TMP6]], i8** [[TMP7]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP8:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l182.region_id, i32 1, i8** [[TMP6]], i8** [[TMP7]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.21, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.22, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK20-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
 // CHECK20-NEXT:    br i1 [[TMP9]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK20:       omp_offload.failed:
@@ -12800,11 +12662,11 @@ int bar(int n){
 // CHECK20-NEXT:    [[VLA_ADDR:%.*]] = alloca i32, align 4
 // CHECK20-NEXT:    store i32 [[VLA]], i32* [[VLA_ADDR]], align 4
 // CHECK20-NEXT:    [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..20 to void (i32*, i32*, ...)*), i32 [[TMP0]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..19
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..20
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[VLA:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -12886,14 +12748,14 @@ int bar(int n){
 // CHECK20-NEXT:    [[TMP7:%.*]] = mul nuw i32 2, [[TMP1]]
 // CHECK20-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 2
 // CHECK20-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-// CHECK20-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK20-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
-// CHECK20-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK20-NEXT:    store double* [[A]], double** [[TMP13]], align 4
-// CHECK20-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 8, i64* [[TMP14]], align 4
+// CHECK20-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.24 to i8*), i32 40, i1 false)
+// CHECK20-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK20-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 4
+// CHECK20-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK20-NEXT:    store double* [[A]], double** [[TMP14]], align 4
 // CHECK20-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK20-NEXT:    store i8* null, i8** [[TMP15]], align 4
 // CHECK20-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -12902,46 +12764,40 @@ int bar(int n){
 // CHECK20-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK20-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32*
 // CHECK20-NEXT:    store i32 [[TMP5]], i32* [[TMP19]], align 4
-// CHECK20-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP20]], align 4
-// CHECK20-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP21]], align 4
-// CHECK20-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
-// CHECK20-NEXT:    store i32 2, i32* [[TMP23]], align 4
-// CHECK20-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
-// CHECK20-NEXT:    store i32 2, i32* [[TMP25]], align 4
-// CHECK20-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 4, i64* [[TMP26]], align 4
-// CHECK20-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP27]], align 4
-// CHECK20-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP20]], align 4
+// CHECK20-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
+// CHECK20-NEXT:    store i32 2, i32* [[TMP22]], align 4
+// CHECK20-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
+// CHECK20-NEXT:    store i32 2, i32* [[TMP24]], align 4
+// CHECK20-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP25]], align 4
+// CHECK20-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP27]], align 4
+// CHECK20-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK20-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
 // CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP29]], align 4
-// CHECK20-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP31]], align 4
-// CHECK20-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK20-NEXT:    store i64 4, i64* [[TMP32]], align 4
-// CHECK20-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK20-NEXT:    store i8* null, i8** [[TMP33]], align 4
-// CHECK20-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK20-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK20-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 4
-// CHECK20-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK20-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK20-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 4
-// CHECK20-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK20-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 4
-// CHECK20-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK20-NEXT:    store i8* null, i8** [[TMP39]], align 4
-// CHECK20-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.23, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK20-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK20-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK20-NEXT:    store i8* null, i8** [[TMP30]], align 4
+// CHECK20-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK20-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK20-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 4
+// CHECK20-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK20-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK20-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 4
+// CHECK20-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK20-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 4
+// CHECK20-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK20-NEXT:    store i8* null, i8** [[TMP36]], align 4
+// CHECK20-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK20-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK20:       omp_offload.failed:
 // CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -12951,15 +12807,15 @@ int bar(int n){
 // CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l227(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK20-NEXT:    br label [[OMP_IF_END]]
 // CHECK20:       omp_if.end:
-// CHECK20-NEXT:    [[TMP45:%.*]] = mul nsw i32 1, [[TMP1]]
-// CHECK20-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP45]]
+// CHECK20-NEXT:    [[TMP42:%.*]] = mul nsw i32 1, [[TMP1]]
+// CHECK20-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP42]]
 // CHECK20-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
-// CHECK20-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK20-NEXT:    [[CONV:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK20-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK20-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP47]]
-// CHECK20-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK20-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK20-NEXT:    [[CONV:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK20-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK20-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP44]]
+// CHECK20-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK20-NEXT:    ret i32 [[ADD3]]
 //
 //
@@ -13030,7 +12886,7 @@ int bar(int n){
 // CHECK20-NEXT:    store i8* null, i8** [[TMP26]], align 4
 // CHECK20-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l209.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.25, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.26, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP29:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l209.region_id, i32 4, i8** [[TMP27]], i8** [[TMP28]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK20-NEXT:    [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
 // CHECK20-NEXT:    br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK20:       omp_offload.failed:
@@ -13098,7 +12954,7 @@ int bar(int n){
 // CHECK20-NEXT:    store i8* null, i8** [[TMP19]], align 4
 // CHECK20-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l192.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l192.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.30, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.31, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK20-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK20-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK20:       omp_offload.failed:
@@ -13135,11 +12991,11 @@ int bar(int n){
 // CHECK20-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
 // CHECK20-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -13205,11 +13061,11 @@ int bar(int n){
 // CHECK20-NEXT:    [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
 // CHECK20-NEXT:    store i8 [[TMP5]], i8* [[CONV3]], align 1
 // CHECK20-NEXT:    [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..24 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..24
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -13267,11 +13123,11 @@ int bar(int n){
 // CHECK20-NEXT:    [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
 // CHECK20-NEXT:    store i16 [[TMP3]], i16* [[CONV1]], align 2
 // CHECK20-NEXT:    [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..29 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..27
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..29
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4

diff  --git a/clang/test/OpenMP/target_teams_distribute_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_codegen.cpp
index 9b099d831ca89..059ffdd8faeb2 100644
--- a/clang/test/OpenMP/target_teams_distribute_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_codegen.cpp
@@ -512,14 +512,14 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP95:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK1-NEXT:    [[TMP96:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK1-NEXT:    [[TMP97:%.*]] = mul nuw i64 [[TMP96]], 8
-// CHECK1-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP91]], i64* [[TMP99]], align 8
-// CHECK1-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP91]], i64* [[TMP101]], align 8
-// CHECK1-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    store i64 4, i64* [[TMP102]], align 8
+// CHECK1-NEXT:    [[TMP98:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP98]], i8* align 8 bitcast ([10 x i64]* @.offload_sizes.10 to i8*), i64 80, i1 false)
+// CHECK1-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP91]], i64* [[TMP100]], align 8
+// CHECK1-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP91]], i64* [[TMP102]], align 8
 // CHECK1-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 0
 // CHECK1-NEXT:    store i8* null, i8** [[TMP103]], align 8
 // CHECK1-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 1
@@ -528,97 +528,83 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to [10 x float]**
 // CHECK1-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP107]], align 8
-// CHECK1-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK1-NEXT:    store i64 40, i64* [[TMP108]], align 8
-// CHECK1-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 1
-// CHECK1-NEXT:    store i8* null, i8** [[TMP109]], align 8
-// CHECK1-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 2
-// CHECK1-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP111]], align 8
-// CHECK1-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 2
-// CHECK1-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP113]], align 8
-// CHECK1-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK1-NEXT:    store i64 8, i64* [[TMP114]], align 8
-// CHECK1-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 2
-// CHECK1-NEXT:    store i8* null, i8** [[TMP115]], align 8
-// CHECK1-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 1
+// CHECK1-NEXT:    store i8* null, i8** [[TMP108]], align 8
+// CHECK1-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP110]], align 8
+// CHECK1-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP112]], align 8
+// CHECK1-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 2
+// CHECK1-NEXT:    store i8* null, i8** [[TMP113]], align 8
+// CHECK1-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to float**
+// CHECK1-NEXT:    store float* [[VLA]], float** [[TMP115]], align 8
+// CHECK1-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 3
 // CHECK1-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to float**
 // CHECK1-NEXT:    store float* [[VLA]], float** [[TMP117]], align 8
-// CHECK1-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 3
-// CHECK1-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to float**
-// CHECK1-NEXT:    store float* [[VLA]], float** [[TMP119]], align 8
-// CHECK1-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK1-NEXT:    store i64 [[TMP95]], i64* [[TMP120]], align 8
-// CHECK1-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 3
-// CHECK1-NEXT:    store i8* null, i8** [[TMP121]], align 8
-// CHECK1-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK1-NEXT:    store i64 [[TMP95]], i64* [[TMP118]], align 8
+// CHECK1-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 3
+// CHECK1-NEXT:    store i8* null, i8** [[TMP119]], align 8
+// CHECK1-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP121:%.*]] = bitcast i8** [[TMP120]] to [5 x [10 x double]]**
+// CHECK1-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP121]], align 8
+// CHECK1-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 4
 // CHECK1-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to [5 x [10 x double]]**
 // CHECK1-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP123]], align 8
-// CHECK1-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 4
-// CHECK1-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to [5 x [10 x double]]**
-// CHECK1-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP125]], align 8
-// CHECK1-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK1-NEXT:    store i64 400, i64* [[TMP126]], align 8
-// CHECK1-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 4
-// CHECK1-NEXT:    store i8* null, i8** [[TMP127]], align 8
-// CHECK1-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 5
-// CHECK1-NEXT:    [[TMP129:%.*]] = bitcast i8** [[TMP128]] to i64*
-// CHECK1-NEXT:    store i64 5, i64* [[TMP129]], align 8
-// CHECK1-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 4
+// CHECK1-NEXT:    store i8* null, i8** [[TMP124]], align 8
+// CHECK1-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i64*
+// CHECK1-NEXT:    store i64 5, i64* [[TMP126]], align 8
+// CHECK1-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i64*
+// CHECK1-NEXT:    store i64 5, i64* [[TMP128]], align 8
+// CHECK1-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 5
+// CHECK1-NEXT:    store i8* null, i8** [[TMP129]], align 8
+// CHECK1-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 6
 // CHECK1-NEXT:    [[TMP131:%.*]] = bitcast i8** [[TMP130]] to i64*
-// CHECK1-NEXT:    store i64 5, i64* [[TMP131]], align 8
-// CHECK1-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK1-NEXT:    store i64 8, i64* [[TMP132]], align 8
-// CHECK1-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 5
-// CHECK1-NEXT:    store i8* null, i8** [[TMP133]], align 8
-// CHECK1-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 6
-// CHECK1-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP135]], align 8
-// CHECK1-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 6
-// CHECK1-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP137]], align 8
-// CHECK1-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK1-NEXT:    store i64 8, i64* [[TMP138]], align 8
-// CHECK1-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 6
-// CHECK1-NEXT:    store i8* null, i8** [[TMP139]], align 8
-// CHECK1-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 7
-// CHECK1-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to double**
-// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP141]], align 8
-// CHECK1-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 7
-// CHECK1-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to double**
-// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP143]], align 8
-// CHECK1-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK1-NEXT:    store i64 [[TMP97]], i64* [[TMP144]], align 8
-// CHECK1-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 7
+// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP131]], align 8
+// CHECK1-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 6
+// CHECK1-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP133]], align 8
+// CHECK1-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 6
+// CHECK1-NEXT:    store i8* null, i8** [[TMP134]], align 8
+// CHECK1-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 7
+// CHECK1-NEXT:    [[TMP136:%.*]] = bitcast i8** [[TMP135]] to double**
+// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP136]], align 8
+// CHECK1-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 7
+// CHECK1-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to double**
+// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP138]], align 8
+// CHECK1-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK1-NEXT:    store i64 [[TMP97]], i64* [[TMP139]], align 8
+// CHECK1-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 7
+// CHECK1-NEXT:    store i8* null, i8** [[TMP140]], align 8
+// CHECK1-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 8
+// CHECK1-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to %struct.TT**
+// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP142]], align 8
+// CHECK1-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 8
+// CHECK1-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to %struct.TT**
+// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP144]], align 8
+// CHECK1-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 8
 // CHECK1-NEXT:    store i8* null, i8** [[TMP145]], align 8
-// CHECK1-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 8
-// CHECK1-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to %struct.TT**
-// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP147]], align 8
-// CHECK1-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 8
-// CHECK1-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to %struct.TT**
-// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP149]], align 8
-// CHECK1-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK1-NEXT:    store i64 16, i64* [[TMP150]], align 8
-// CHECK1-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 8
-// CHECK1-NEXT:    store i8* null, i8** [[TMP151]], align 8
-// CHECK1-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 9
-// CHECK1-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP93]], i64* [[TMP153]], align 8
-// CHECK1-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 9
-// CHECK1-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP93]], i64* [[TMP155]], align 8
-// CHECK1-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK1-NEXT:    store i64 4, i64* [[TMP156]], align 8
-// CHECK1-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 9
-// CHECK1-NEXT:    store i8* null, i8** [[TMP157]], align 8
-// CHECK1-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 9
+// CHECK1-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP93]], i64* [[TMP147]], align 8
+// CHECK1-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 9
+// CHECK1-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP93]], i64* [[TMP149]], align 8
+// CHECK1-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 9
+// CHECK1-NEXT:    store i8* null, i8** [[TMP150]], align 8
+// CHECK1-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK1-NEXT:    [[TMP161:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145.region_id, i32 10, i8** [[TMP158]], i8** [[TMP159]], i64* [[TMP160]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP162:%.*]] = icmp ne i32 [[TMP161]], 0
-// CHECK1-NEXT:    br i1 [[TMP162]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
+// CHECK1-NEXT:    [[TMP154:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145.region_id, i32 10, i8** [[TMP151]], i8** [[TMP152]], i64* [[TMP153]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.11, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK1-NEXT:    [[TMP155:%.*]] = icmp ne i32 [[TMP154]], 0
+// CHECK1-NEXT:    br i1 [[TMP155]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
 // CHECK1:       omp_offload.failed33:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145(i64 [[TMP91]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]], i64 [[TMP93]]) #[[ATTR3]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT34]]
@@ -628,10 +614,10 @@ int bar(int n){
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145(i64 [[TMP91]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]], i64 [[TMP93]]) #[[ATTR3]]
 // CHECK1-NEXT:    br label [[OMP_IF_END36]]
 // CHECK1:       omp_if.end36:
-// CHECK1-NEXT:    [[TMP163:%.*]] = load i32, i32* [[A]], align 4
-// CHECK1-NEXT:    [[TMP164:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP164]])
-// CHECK1-NEXT:    ret i32 [[TMP163]]
+// CHECK1-NEXT:    [[TMP156:%.*]] = load i32, i32* [[A]], align 4
+// CHECK1-NEXT:    [[TMP157:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP157]])
+// CHECK1-NEXT:    ret i32 [[TMP156]]
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103
@@ -1352,14 +1338,14 @@ int bar(int n){
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP8:%.*]] = mul nuw i64 2, [[TMP2]]
 // CHECK1-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2
-// CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK1-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
-// CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK1-NEXT:    store double* [[A]], double** [[TMP13]], align 8
-// CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    store i64 8, i64* [[TMP14]], align 8
+// CHECK1-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.13 to i8*), i64 40, i1 false)
+// CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK1-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 8
+// CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK1-NEXT:    store double* [[A]], double** [[TMP14]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK1-NEXT:    store i8* null, i8** [[TMP15]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1368,47 +1354,41 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i64*
 // CHECK1-NEXT:    store i64 [[TMP6]], i64* [[TMP19]], align 8
-// CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK1-NEXT:    store i64 4, i64* [[TMP20]], align 8
-// CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK1-NEXT:    store i8* null, i8** [[TMP21]], align 8
-// CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK1-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
-// CHECK1-NEXT:    store i64 2, i64* [[TMP23]], align 8
-// CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK1-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK1-NEXT:    store i64 2, i64* [[TMP25]], align 8
-// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK1-NEXT:    store i64 8, i64* [[TMP26]], align 8
-// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK1-NEXT:    store i8* null, i8** [[TMP27]], align 8
-// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK1-NEXT:    store i8* null, i8** [[TMP20]], align 8
+// CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
+// CHECK1-NEXT:    store i64 2, i64* [[TMP22]], align 8
+// CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK1-NEXT:    store i64 2, i64* [[TMP24]], align 8
+// CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK1-NEXT:    store i8* null, i8** [[TMP25]], align 8
+// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP27]], align 8
+// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK1-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
 // CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP29]], align 8
-// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK1-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP31]], align 8
-// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK1-NEXT:    store i64 8, i64* [[TMP32]], align 8
-// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK1-NEXT:    store i8* null, i8** [[TMP33]], align 8
-// CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK1-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 8
-// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK1-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 8
-// CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK1-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 8
-// CHECK1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK1-NEXT:    store i8* null, i8** [[TMP39]], align 8
-// CHECK1-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK1-NEXT:    store i8* null, i8** [[TMP30]], align 8
+// CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 8
+// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 8
+// CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK1-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 8
+// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK1-NEXT:    store i8* null, i8** [[TMP36]], align 8
+// CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK1-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK1-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK1-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK1-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK1-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -1418,15 +1398,15 @@ int bar(int n){
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK1-NEXT:    br label [[OMP_IF_END]]
 // CHECK1:       omp_if.end:
-// CHECK1-NEXT:    [[TMP45:%.*]] = mul nsw i64 1, [[TMP2]]
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP45]]
+// CHECK1-NEXT:    [[TMP42:%.*]] = mul nsw i64 1, [[TMP2]]
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP42]]
 // CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
-// CHECK1-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK1-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK1-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP47]]
-// CHECK1-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK1-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK1-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK1-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP44]]
+// CHECK1-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK1-NEXT:    ret i32 [[ADD4]]
 //
 //
@@ -1531,7 +1511,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[ADD8:%.*]] = add i32 [[TMP40]], 1
 // CHECK1-NEXT:    [[TMP41:%.*]] = zext i32 [[ADD8]] to i64
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP41]])
-// CHECK1-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l200.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK1-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l200.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK1-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK1-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
@@ -1602,7 +1582,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK1-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l183.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK1-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l183.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK1-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK1-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
@@ -1641,11 +1621,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32*
 // CHECK1-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK1-NEXT:    [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8
-// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12
 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1772,11 +1752,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
 // CHECK1-NEXT:    store i8 [[TMP7]], i8* [[CONV7]], align 1
 // CHECK1-NEXT:    [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
-// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], [10 x i32]* [[TMP0]])
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], [10 x i32]* [[TMP0]])
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1922,11 +1902,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
 // CHECK1-NEXT:    store i16 [[TMP3]], i16* [[CONV3]], align 2
 // CHECK1-NEXT:    [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8
-// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2232,14 +2212,14 @@ int bar(int n){
 // CHECK2-NEXT:    [[TMP95:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK2-NEXT:    [[TMP96:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK2-NEXT:    [[TMP97:%.*]] = mul nuw i64 [[TMP96]], 8
-// CHECK2-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP91]], i64* [[TMP99]], align 8
-// CHECK2-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP91]], i64* [[TMP101]], align 8
-// CHECK2-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK2-NEXT:    store i64 4, i64* [[TMP102]], align 8
+// CHECK2-NEXT:    [[TMP98:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP98]], i8* align 8 bitcast ([10 x i64]* @.offload_sizes.10 to i8*), i64 80, i1 false)
+// CHECK2-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP91]], i64* [[TMP100]], align 8
+// CHECK2-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP91]], i64* [[TMP102]], align 8
 // CHECK2-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 0
 // CHECK2-NEXT:    store i8* null, i8** [[TMP103]], align 8
 // CHECK2-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 1
@@ -2248,97 +2228,83 @@ int bar(int n){
 // CHECK2-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 1
 // CHECK2-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to [10 x float]**
 // CHECK2-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP107]], align 8
-// CHECK2-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK2-NEXT:    store i64 40, i64* [[TMP108]], align 8
-// CHECK2-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 1
-// CHECK2-NEXT:    store i8* null, i8** [[TMP109]], align 8
-// CHECK2-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 2
-// CHECK2-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP111]], align 8
-// CHECK2-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 2
-// CHECK2-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP113]], align 8
-// CHECK2-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK2-NEXT:    store i64 8, i64* [[TMP114]], align 8
-// CHECK2-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 2
-// CHECK2-NEXT:    store i8* null, i8** [[TMP115]], align 8
-// CHECK2-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 3
+// CHECK2-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 1
+// CHECK2-NEXT:    store i8* null, i8** [[TMP108]], align 8
+// CHECK2-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP110]], align 8
+// CHECK2-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP112]], align 8
+// CHECK2-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 2
+// CHECK2-NEXT:    store i8* null, i8** [[TMP113]], align 8
+// CHECK2-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 3
+// CHECK2-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to float**
+// CHECK2-NEXT:    store float* [[VLA]], float** [[TMP115]], align 8
+// CHECK2-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 3
 // CHECK2-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to float**
 // CHECK2-NEXT:    store float* [[VLA]], float** [[TMP117]], align 8
-// CHECK2-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 3
-// CHECK2-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to float**
-// CHECK2-NEXT:    store float* [[VLA]], float** [[TMP119]], align 8
-// CHECK2-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK2-NEXT:    store i64 [[TMP95]], i64* [[TMP120]], align 8
-// CHECK2-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 3
-// CHECK2-NEXT:    store i8* null, i8** [[TMP121]], align 8
-// CHECK2-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 4
+// CHECK2-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK2-NEXT:    store i64 [[TMP95]], i64* [[TMP118]], align 8
+// CHECK2-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 3
+// CHECK2-NEXT:    store i8* null, i8** [[TMP119]], align 8
+// CHECK2-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 4
+// CHECK2-NEXT:    [[TMP121:%.*]] = bitcast i8** [[TMP120]] to [5 x [10 x double]]**
+// CHECK2-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP121]], align 8
+// CHECK2-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 4
 // CHECK2-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to [5 x [10 x double]]**
 // CHECK2-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP123]], align 8
-// CHECK2-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 4
-// CHECK2-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to [5 x [10 x double]]**
-// CHECK2-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP125]], align 8
-// CHECK2-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK2-NEXT:    store i64 400, i64* [[TMP126]], align 8
-// CHECK2-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 4
-// CHECK2-NEXT:    store i8* null, i8** [[TMP127]], align 8
-// CHECK2-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 5
-// CHECK2-NEXT:    [[TMP129:%.*]] = bitcast i8** [[TMP128]] to i64*
-// CHECK2-NEXT:    store i64 5, i64* [[TMP129]], align 8
-// CHECK2-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 5
+// CHECK2-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 4
+// CHECK2-NEXT:    store i8* null, i8** [[TMP124]], align 8
+// CHECK2-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 5
+// CHECK2-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i64*
+// CHECK2-NEXT:    store i64 5, i64* [[TMP126]], align 8
+// CHECK2-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 5
+// CHECK2-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i64*
+// CHECK2-NEXT:    store i64 5, i64* [[TMP128]], align 8
+// CHECK2-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 5
+// CHECK2-NEXT:    store i8* null, i8** [[TMP129]], align 8
+// CHECK2-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 6
 // CHECK2-NEXT:    [[TMP131:%.*]] = bitcast i8** [[TMP130]] to i64*
-// CHECK2-NEXT:    store i64 5, i64* [[TMP131]], align 8
-// CHECK2-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK2-NEXT:    store i64 8, i64* [[TMP132]], align 8
-// CHECK2-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 5
-// CHECK2-NEXT:    store i8* null, i8** [[TMP133]], align 8
-// CHECK2-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 6
-// CHECK2-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP5]], i64* [[TMP135]], align 8
-// CHECK2-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 6
-// CHECK2-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP5]], i64* [[TMP137]], align 8
-// CHECK2-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK2-NEXT:    store i64 8, i64* [[TMP138]], align 8
-// CHECK2-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 6
-// CHECK2-NEXT:    store i8* null, i8** [[TMP139]], align 8
-// CHECK2-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 7
-// CHECK2-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to double**
-// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP141]], align 8
-// CHECK2-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 7
-// CHECK2-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to double**
-// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP143]], align 8
-// CHECK2-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK2-NEXT:    store i64 [[TMP97]], i64* [[TMP144]], align 8
-// CHECK2-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 7
+// CHECK2-NEXT:    store i64 [[TMP5]], i64* [[TMP131]], align 8
+// CHECK2-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 6
+// CHECK2-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP5]], i64* [[TMP133]], align 8
+// CHECK2-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 6
+// CHECK2-NEXT:    store i8* null, i8** [[TMP134]], align 8
+// CHECK2-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 7
+// CHECK2-NEXT:    [[TMP136:%.*]] = bitcast i8** [[TMP135]] to double**
+// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP136]], align 8
+// CHECK2-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 7
+// CHECK2-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to double**
+// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP138]], align 8
+// CHECK2-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK2-NEXT:    store i64 [[TMP97]], i64* [[TMP139]], align 8
+// CHECK2-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 7
+// CHECK2-NEXT:    store i8* null, i8** [[TMP140]], align 8
+// CHECK2-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 8
+// CHECK2-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to %struct.TT**
+// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP142]], align 8
+// CHECK2-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 8
+// CHECK2-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to %struct.TT**
+// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP144]], align 8
+// CHECK2-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 8
 // CHECK2-NEXT:    store i8* null, i8** [[TMP145]], align 8
-// CHECK2-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 8
-// CHECK2-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to %struct.TT**
-// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP147]], align 8
-// CHECK2-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 8
-// CHECK2-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to %struct.TT**
-// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP149]], align 8
-// CHECK2-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK2-NEXT:    store i64 16, i64* [[TMP150]], align 8
-// CHECK2-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 8
-// CHECK2-NEXT:    store i8* null, i8** [[TMP151]], align 8
-// CHECK2-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 9
-// CHECK2-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP93]], i64* [[TMP153]], align 8
-// CHECK2-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 9
-// CHECK2-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP93]], i64* [[TMP155]], align 8
-// CHECK2-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK2-NEXT:    store i64 4, i64* [[TMP156]], align 8
-// CHECK2-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 9
-// CHECK2-NEXT:    store i8* null, i8** [[TMP157]], align 8
-// CHECK2-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 9
+// CHECK2-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP93]], i64* [[TMP147]], align 8
+// CHECK2-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 9
+// CHECK2-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP93]], i64* [[TMP149]], align 8
+// CHECK2-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 9
+// CHECK2-NEXT:    store i8* null, i8** [[TMP150]], align 8
+// CHECK2-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK2-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK2-NEXT:    [[TMP161:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145.region_id, i32 10, i8** [[TMP158]], i8** [[TMP159]], i64* [[TMP160]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK2-NEXT:    [[TMP162:%.*]] = icmp ne i32 [[TMP161]], 0
-// CHECK2-NEXT:    br i1 [[TMP162]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
+// CHECK2-NEXT:    [[TMP154:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145.region_id, i32 10, i8** [[TMP151]], i8** [[TMP152]], i64* [[TMP153]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.11, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK2-NEXT:    [[TMP155:%.*]] = icmp ne i32 [[TMP154]], 0
+// CHECK2-NEXT:    br i1 [[TMP155]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
 // CHECK2:       omp_offload.failed33:
 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145(i64 [[TMP91]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]], i64 [[TMP93]]) #[[ATTR3]]
 // CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT34]]
@@ -2348,10 +2314,10 @@ int bar(int n){
 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145(i64 [[TMP91]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]], i64 [[TMP93]]) #[[ATTR3]]
 // CHECK2-NEXT:    br label [[OMP_IF_END36]]
 // CHECK2:       omp_if.end36:
-// CHECK2-NEXT:    [[TMP163:%.*]] = load i32, i32* [[A]], align 4
-// CHECK2-NEXT:    [[TMP164:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP164]])
-// CHECK2-NEXT:    ret i32 [[TMP163]]
+// CHECK2-NEXT:    [[TMP156:%.*]] = load i32, i32* [[A]], align 4
+// CHECK2-NEXT:    [[TMP157:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP157]])
+// CHECK2-NEXT:    ret i32 [[TMP156]]
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103
@@ -3072,14 +3038,14 @@ int bar(int n){
 // CHECK2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP8:%.*]] = mul nuw i64 2, [[TMP2]]
 // CHECK2-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2
-// CHECK2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK2-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
-// CHECK2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK2-NEXT:    store double* [[A]], double** [[TMP13]], align 8
-// CHECK2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK2-NEXT:    store i64 8, i64* [[TMP14]], align 8
+// CHECK2-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.13 to i8*), i64 40, i1 false)
+// CHECK2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK2-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 8
+// CHECK2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK2-NEXT:    store double* [[A]], double** [[TMP14]], align 8
 // CHECK2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK2-NEXT:    store i8* null, i8** [[TMP15]], align 8
 // CHECK2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -3088,47 +3054,41 @@ int bar(int n){
 // CHECK2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK2-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i64*
 // CHECK2-NEXT:    store i64 [[TMP6]], i64* [[TMP19]], align 8
-// CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK2-NEXT:    store i64 4, i64* [[TMP20]], align 8
-// CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK2-NEXT:    store i8* null, i8** [[TMP21]], align 8
-// CHECK2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK2-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
-// CHECK2-NEXT:    store i64 2, i64* [[TMP23]], align 8
-// CHECK2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK2-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK2-NEXT:    store i64 2, i64* [[TMP25]], align 8
-// CHECK2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK2-NEXT:    store i64 8, i64* [[TMP26]], align 8
-// CHECK2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK2-NEXT:    store i8* null, i8** [[TMP27]], align 8
-// CHECK2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK2-NEXT:    store i8* null, i8** [[TMP20]], align 8
+// CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
+// CHECK2-NEXT:    store i64 2, i64* [[TMP22]], align 8
+// CHECK2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK2-NEXT:    store i64 2, i64* [[TMP24]], align 8
+// CHECK2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK2-NEXT:    store i8* null, i8** [[TMP25]], align 8
+// CHECK2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK2-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP27]], align 8
+// CHECK2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK2-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
 // CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP29]], align 8
-// CHECK2-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK2-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP31]], align 8
-// CHECK2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK2-NEXT:    store i64 8, i64* [[TMP32]], align 8
-// CHECK2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK2-NEXT:    store i8* null, i8** [[TMP33]], align 8
-// CHECK2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK2-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 8
-// CHECK2-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK2-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 8
-// CHECK2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK2-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 8
-// CHECK2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK2-NEXT:    store i8* null, i8** [[TMP39]], align 8
-// CHECK2-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK2-NEXT:    store i8* null, i8** [[TMP30]], align 8
+// CHECK2-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK2-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 8
+// CHECK2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK2-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 8
+// CHECK2-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK2-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 8
+// CHECK2-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK2-NEXT:    store i8* null, i8** [[TMP36]], align 8
+// CHECK2-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK2-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK2-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK2-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK2-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK2-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK2-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK2-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK2:       omp_offload.failed:
 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -3138,15 +3098,15 @@ int bar(int n){
 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK2-NEXT:    br label [[OMP_IF_END]]
 // CHECK2:       omp_if.end:
-// CHECK2-NEXT:    [[TMP45:%.*]] = mul nsw i64 1, [[TMP2]]
-// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP45]]
+// CHECK2-NEXT:    [[TMP42:%.*]] = mul nsw i64 1, [[TMP2]]
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP42]]
 // CHECK2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
-// CHECK2-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK2-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK2-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP47]]
-// CHECK2-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK2-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK2-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK2-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP44]]
+// CHECK2-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK2-NEXT:    ret i32 [[ADD4]]
 //
 //
@@ -3251,7 +3211,7 @@ int bar(int n){
 // CHECK2-NEXT:    [[ADD8:%.*]] = add i32 [[TMP40]], 1
 // CHECK2-NEXT:    [[TMP41:%.*]] = zext i32 [[ADD8]] to i64
 // CHECK2-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP41]])
-// CHECK2-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l200.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK2-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l200.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK2-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK2-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK2:       omp_offload.failed:
@@ -3322,7 +3282,7 @@ int bar(int n){
 // CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK2-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK2-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l183.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK2-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l183.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK2-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK2-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK2:       omp_offload.failed:
@@ -3361,11 +3321,11 @@ int bar(int n){
 // CHECK2-NEXT:    [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32*
 // CHECK2-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK2-NEXT:    [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..12
 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3492,11 +3452,11 @@ int bar(int n){
 // CHECK2-NEXT:    [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
 // CHECK2-NEXT:    store i8 [[TMP7]], i8* [[CONV7]], align 1
 // CHECK2-NEXT:    [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], [10 x i32]* [[TMP0]])
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], [10 x i32]* [[TMP0]])
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..13
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3642,11 +3602,11 @@ int bar(int n){
 // CHECK2-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
 // CHECK2-NEXT:    store i16 [[TMP3]], i16* [[CONV3]], align 2
 // CHECK2-NEXT:    [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3946,14 +3906,14 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP95:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK3-NEXT:    [[TMP96:%.*]] = mul nuw i32 [[TMP95]], 8
 // CHECK3-NEXT:    [[TMP97:%.*]] = sext i32 [[TMP96]] to i64
-// CHECK3-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP89]], i32* [[TMP99]], align 4
-// CHECK3-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP89]], i32* [[TMP101]], align 4
-// CHECK3-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    store i64 4, i64* [[TMP102]], align 4
+// CHECK3-NEXT:    [[TMP98:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP98]], i8* align 4 bitcast ([10 x i64]* @.offload_sizes.10 to i8*), i32 80, i1 false)
+// CHECK3-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP89]], i32* [[TMP100]], align 4
+// CHECK3-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP89]], i32* [[TMP102]], align 4
 // CHECK3-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 0
 // CHECK3-NEXT:    store i8* null, i8** [[TMP103]], align 4
 // CHECK3-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 1
@@ -3962,97 +3922,83 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to [10 x float]**
 // CHECK3-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP107]], align 4
-// CHECK3-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK3-NEXT:    store i64 40, i64* [[TMP108]], align 4
-// CHECK3-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 1
-// CHECK3-NEXT:    store i8* null, i8** [[TMP109]], align 4
-// CHECK3-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 2
-// CHECK3-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP111]], align 4
-// CHECK3-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 2
-// CHECK3-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP113]], align 4
-// CHECK3-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK3-NEXT:    store i64 4, i64* [[TMP114]], align 4
-// CHECK3-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 2
-// CHECK3-NEXT:    store i8* null, i8** [[TMP115]], align 4
-// CHECK3-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 1
+// CHECK3-NEXT:    store i8* null, i8** [[TMP108]], align 4
+// CHECK3-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP110]], align 4
+// CHECK3-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP112]], align 4
+// CHECK3-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 2
+// CHECK3-NEXT:    store i8* null, i8** [[TMP113]], align 4
+// CHECK3-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to float**
+// CHECK3-NEXT:    store float* [[VLA]], float** [[TMP115]], align 4
+// CHECK3-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 3
 // CHECK3-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to float**
 // CHECK3-NEXT:    store float* [[VLA]], float** [[TMP117]], align 4
-// CHECK3-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 3
-// CHECK3-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to float**
-// CHECK3-NEXT:    store float* [[VLA]], float** [[TMP119]], align 4
-// CHECK3-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK3-NEXT:    store i64 [[TMP94]], i64* [[TMP120]], align 4
-// CHECK3-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 3
-// CHECK3-NEXT:    store i8* null, i8** [[TMP121]], align 4
-// CHECK3-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK3-NEXT:    store i64 [[TMP94]], i64* [[TMP118]], align 4
+// CHECK3-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 3
+// CHECK3-NEXT:    store i8* null, i8** [[TMP119]], align 4
+// CHECK3-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP121:%.*]] = bitcast i8** [[TMP120]] to [5 x [10 x double]]**
+// CHECK3-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP121]], align 4
+// CHECK3-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 4
 // CHECK3-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to [5 x [10 x double]]**
 // CHECK3-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP123]], align 4
-// CHECK3-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 4
-// CHECK3-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to [5 x [10 x double]]**
-// CHECK3-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP125]], align 4
-// CHECK3-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK3-NEXT:    store i64 400, i64* [[TMP126]], align 4
-// CHECK3-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 4
-// CHECK3-NEXT:    store i8* null, i8** [[TMP127]], align 4
-// CHECK3-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 5
-// CHECK3-NEXT:    [[TMP129:%.*]] = bitcast i8** [[TMP128]] to i32*
-// CHECK3-NEXT:    store i32 5, i32* [[TMP129]], align 4
-// CHECK3-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 5
+// CHECK3-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 4
+// CHECK3-NEXT:    store i8* null, i8** [[TMP124]], align 4
+// CHECK3-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 5
+// CHECK3-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i32*
+// CHECK3-NEXT:    store i32 5, i32* [[TMP126]], align 4
+// CHECK3-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 5
+// CHECK3-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i32*
+// CHECK3-NEXT:    store i32 5, i32* [[TMP128]], align 4
+// CHECK3-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 5
+// CHECK3-NEXT:    store i8* null, i8** [[TMP129]], align 4
+// CHECK3-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 6
 // CHECK3-NEXT:    [[TMP131:%.*]] = bitcast i8** [[TMP130]] to i32*
-// CHECK3-NEXT:    store i32 5, i32* [[TMP131]], align 4
-// CHECK3-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK3-NEXT:    store i64 4, i64* [[TMP132]], align 4
-// CHECK3-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 5
-// CHECK3-NEXT:    store i8* null, i8** [[TMP133]], align 4
-// CHECK3-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 6
-// CHECK3-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP135]], align 4
-// CHECK3-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 6
-// CHECK3-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP137]], align 4
-// CHECK3-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK3-NEXT:    store i64 4, i64* [[TMP138]], align 4
-// CHECK3-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 6
-// CHECK3-NEXT:    store i8* null, i8** [[TMP139]], align 4
-// CHECK3-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 7
-// CHECK3-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to double**
-// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP141]], align 4
-// CHECK3-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 7
-// CHECK3-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to double**
-// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP143]], align 4
-// CHECK3-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK3-NEXT:    store i64 [[TMP97]], i64* [[TMP144]], align 4
-// CHECK3-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 7
+// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP131]], align 4
+// CHECK3-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 6
+// CHECK3-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP133]], align 4
+// CHECK3-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 6
+// CHECK3-NEXT:    store i8* null, i8** [[TMP134]], align 4
+// CHECK3-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 7
+// CHECK3-NEXT:    [[TMP136:%.*]] = bitcast i8** [[TMP135]] to double**
+// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP136]], align 4
+// CHECK3-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 7
+// CHECK3-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to double**
+// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP138]], align 4
+// CHECK3-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK3-NEXT:    store i64 [[TMP97]], i64* [[TMP139]], align 4
+// CHECK3-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 7
+// CHECK3-NEXT:    store i8* null, i8** [[TMP140]], align 4
+// CHECK3-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 8
+// CHECK3-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to %struct.TT**
+// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP142]], align 4
+// CHECK3-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 8
+// CHECK3-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to %struct.TT**
+// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP144]], align 4
+// CHECK3-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 8
 // CHECK3-NEXT:    store i8* null, i8** [[TMP145]], align 4
-// CHECK3-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 8
-// CHECK3-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to %struct.TT**
-// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP147]], align 4
-// CHECK3-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 8
-// CHECK3-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to %struct.TT**
-// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP149]], align 4
-// CHECK3-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK3-NEXT:    store i64 12, i64* [[TMP150]], align 4
-// CHECK3-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 8
-// CHECK3-NEXT:    store i8* null, i8** [[TMP151]], align 4
-// CHECK3-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 9
-// CHECK3-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP91]], i32* [[TMP153]], align 4
-// CHECK3-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 9
-// CHECK3-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP91]], i32* [[TMP155]], align 4
-// CHECK3-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK3-NEXT:    store i64 4, i64* [[TMP156]], align 4
-// CHECK3-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 9
-// CHECK3-NEXT:    store i8* null, i8** [[TMP157]], align 4
-// CHECK3-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 9
+// CHECK3-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP91]], i32* [[TMP147]], align 4
+// CHECK3-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 9
+// CHECK3-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP91]], i32* [[TMP149]], align 4
+// CHECK3-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 9
+// CHECK3-NEXT:    store i8* null, i8** [[TMP150]], align 4
+// CHECK3-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK3-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK3-NEXT:    [[TMP161:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145.region_id, i32 10, i8** [[TMP158]], i8** [[TMP159]], i64* [[TMP160]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK3-NEXT:    [[TMP162:%.*]] = icmp ne i32 [[TMP161]], 0
-// CHECK3-NEXT:    br i1 [[TMP162]], label [[OMP_OFFLOAD_FAILED27:%.*]], label [[OMP_OFFLOAD_CONT28:%.*]]
+// CHECK3-NEXT:    [[TMP154:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145.region_id, i32 10, i8** [[TMP151]], i8** [[TMP152]], i64* [[TMP153]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.11, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK3-NEXT:    [[TMP155:%.*]] = icmp ne i32 [[TMP154]], 0
+// CHECK3-NEXT:    br i1 [[TMP155]], label [[OMP_OFFLOAD_FAILED27:%.*]], label [[OMP_OFFLOAD_CONT28:%.*]]
 // CHECK3:       omp_offload.failed27:
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145(i32 [[TMP89]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]], i32 [[TMP91]]) #[[ATTR3]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT28]]
@@ -4062,10 +4008,10 @@ int bar(int n){
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145(i32 [[TMP89]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]], i32 [[TMP91]]) #[[ATTR3]]
 // CHECK3-NEXT:    br label [[OMP_IF_END30]]
 // CHECK3:       omp_if.end30:
-// CHECK3-NEXT:    [[TMP163:%.*]] = load i32, i32* [[A]], align 4
-// CHECK3-NEXT:    [[TMP164:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP164]])
-// CHECK3-NEXT:    ret i32 [[TMP163]]
+// CHECK3-NEXT:    [[TMP156:%.*]] = load i32, i32* [[A]], align 4
+// CHECK3-NEXT:    [[TMP157:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP157]])
+// CHECK3-NEXT:    ret i32 [[TMP156]]
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103
@@ -4769,14 +4715,14 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP7:%.*]] = mul nuw i32 2, [[TMP1]]
 // CHECK3-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 2
 // CHECK3-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-// CHECK3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK3-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
-// CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK3-NEXT:    store double* [[A]], double** [[TMP13]], align 4
-// CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    store i64 8, i64* [[TMP14]], align 4
+// CHECK3-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.13 to i8*), i32 40, i1 false)
+// CHECK3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK3-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 4
+// CHECK3-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK3-NEXT:    store double* [[A]], double** [[TMP14]], align 4
 // CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK3-NEXT:    store i8* null, i8** [[TMP15]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -4785,47 +4731,41 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32*
 // CHECK3-NEXT:    store i32 [[TMP5]], i32* [[TMP19]], align 4
-// CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK3-NEXT:    store i64 4, i64* [[TMP20]], align 4
-// CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK3-NEXT:    store i8* null, i8** [[TMP21]], align 4
-// CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK3-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
-// CHECK3-NEXT:    store i32 2, i32* [[TMP23]], align 4
-// CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK3-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
-// CHECK3-NEXT:    store i32 2, i32* [[TMP25]], align 4
-// CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK3-NEXT:    store i64 4, i64* [[TMP26]], align 4
-// CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK3-NEXT:    store i8* null, i8** [[TMP27]], align 4
-// CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK3-NEXT:    store i8* null, i8** [[TMP20]], align 4
+// CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
+// CHECK3-NEXT:    store i32 2, i32* [[TMP22]], align 4
+// CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
+// CHECK3-NEXT:    store i32 2, i32* [[TMP24]], align 4
+// CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK3-NEXT:    store i8* null, i8** [[TMP25]], align 4
+// CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP27]], align 4
+// CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK3-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
 // CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP29]], align 4
-// CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK3-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP31]], align 4
-// CHECK3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK3-NEXT:    store i64 4, i64* [[TMP32]], align 4
-// CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK3-NEXT:    store i8* null, i8** [[TMP33]], align 4
-// CHECK3-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK3-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 4
-// CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK3-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 4
-// CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK3-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 4
-// CHECK3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK3-NEXT:    store i8* null, i8** [[TMP39]], align 4
-// CHECK3-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK3-NEXT:    store i8* null, i8** [[TMP30]], align 4
+// CHECK3-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 4
+// CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 4
+// CHECK3-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK3-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 4
+// CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK3-NEXT:    store i8* null, i8** [[TMP36]], align 4
+// CHECK3-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK3-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK3-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK3-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK3-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK3-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK3-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK3:       omp_offload.failed:
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -4835,15 +4775,15 @@ int bar(int n){
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK3-NEXT:    br label [[OMP_IF_END]]
 // CHECK3:       omp_if.end:
-// CHECK3-NEXT:    [[TMP45:%.*]] = mul nsw i32 1, [[TMP1]]
-// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP45]]
+// CHECK3-NEXT:    [[TMP42:%.*]] = mul nsw i32 1, [[TMP1]]
+// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP42]]
 // CHECK3-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
-// CHECK3-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK3-NEXT:    [[CONV:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK3-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP47]]
-// CHECK3-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK3-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK3-NEXT:    [[CONV:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK3-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP44]]
+// CHECK3-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK3-NEXT:    ret i32 [[ADD3]]
 //
 //
@@ -4946,7 +4886,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[ADD6:%.*]] = add i32 [[TMP40]], 1
 // CHECK3-NEXT:    [[TMP41:%.*]] = zext i32 [[ADD6]] to i64
 // CHECK3-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP41]])
-// CHECK3-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l200.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK3-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l200.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK3-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK3-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK3:       omp_offload.failed:
@@ -5016,7 +4956,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK3-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK3-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l183.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK3-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l183.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK3-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK3-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK3:       omp_offload.failed:
@@ -5053,11 +4993,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
 // CHECK3-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..12
 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5179,11 +5119,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
 // CHECK3-NEXT:    store i8 [[TMP7]], i8* [[CONV3]], align 1
 // CHECK3-NEXT:    [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], [10 x i32]* [[TMP0]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], [10 x i32]* [[TMP0]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5325,11 +5265,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
 // CHECK3-NEXT:    store i16 [[TMP3]], i16* [[CONV1]], align 2
 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5628,14 +5568,14 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP95:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK4-NEXT:    [[TMP96:%.*]] = mul nuw i32 [[TMP95]], 8
 // CHECK4-NEXT:    [[TMP97:%.*]] = sext i32 [[TMP96]] to i64
-// CHECK4-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP89]], i32* [[TMP99]], align 4
-// CHECK4-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP89]], i32* [[TMP101]], align 4
-// CHECK4-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK4-NEXT:    store i64 4, i64* [[TMP102]], align 4
+// CHECK4-NEXT:    [[TMP98:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK4-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP98]], i8* align 4 bitcast ([10 x i64]* @.offload_sizes.10 to i8*), i32 80, i1 false)
+// CHECK4-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP89]], i32* [[TMP100]], align 4
+// CHECK4-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP89]], i32* [[TMP102]], align 4
 // CHECK4-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 0
 // CHECK4-NEXT:    store i8* null, i8** [[TMP103]], align 4
 // CHECK4-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 1
@@ -5644,97 +5584,83 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 1
 // CHECK4-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to [10 x float]**
 // CHECK4-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP107]], align 4
-// CHECK4-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK4-NEXT:    store i64 40, i64* [[TMP108]], align 4
-// CHECK4-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 1
-// CHECK4-NEXT:    store i8* null, i8** [[TMP109]], align 4
-// CHECK4-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 2
-// CHECK4-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP111]], align 4
-// CHECK4-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 2
-// CHECK4-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP113]], align 4
-// CHECK4-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK4-NEXT:    store i64 4, i64* [[TMP114]], align 4
-// CHECK4-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 2
-// CHECK4-NEXT:    store i8* null, i8** [[TMP115]], align 4
-// CHECK4-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 3
+// CHECK4-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 1
+// CHECK4-NEXT:    store i8* null, i8** [[TMP108]], align 4
+// CHECK4-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 2
+// CHECK4-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP110]], align 4
+// CHECK4-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 2
+// CHECK4-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP112]], align 4
+// CHECK4-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 2
+// CHECK4-NEXT:    store i8* null, i8** [[TMP113]], align 4
+// CHECK4-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 3
+// CHECK4-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to float**
+// CHECK4-NEXT:    store float* [[VLA]], float** [[TMP115]], align 4
+// CHECK4-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 3
 // CHECK4-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to float**
 // CHECK4-NEXT:    store float* [[VLA]], float** [[TMP117]], align 4
-// CHECK4-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 3
-// CHECK4-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to float**
-// CHECK4-NEXT:    store float* [[VLA]], float** [[TMP119]], align 4
-// CHECK4-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK4-NEXT:    store i64 [[TMP94]], i64* [[TMP120]], align 4
-// CHECK4-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 3
-// CHECK4-NEXT:    store i8* null, i8** [[TMP121]], align 4
-// CHECK4-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 4
+// CHECK4-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK4-NEXT:    store i64 [[TMP94]], i64* [[TMP118]], align 4
+// CHECK4-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 3
+// CHECK4-NEXT:    store i8* null, i8** [[TMP119]], align 4
+// CHECK4-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 4
+// CHECK4-NEXT:    [[TMP121:%.*]] = bitcast i8** [[TMP120]] to [5 x [10 x double]]**
+// CHECK4-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP121]], align 4
+// CHECK4-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 4
 // CHECK4-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to [5 x [10 x double]]**
 // CHECK4-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP123]], align 4
-// CHECK4-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 4
-// CHECK4-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to [5 x [10 x double]]**
-// CHECK4-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP125]], align 4
-// CHECK4-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK4-NEXT:    store i64 400, i64* [[TMP126]], align 4
-// CHECK4-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 4
-// CHECK4-NEXT:    store i8* null, i8** [[TMP127]], align 4
-// CHECK4-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 5
-// CHECK4-NEXT:    [[TMP129:%.*]] = bitcast i8** [[TMP128]] to i32*
-// CHECK4-NEXT:    store i32 5, i32* [[TMP129]], align 4
-// CHECK4-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 5
+// CHECK4-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 4
+// CHECK4-NEXT:    store i8* null, i8** [[TMP124]], align 4
+// CHECK4-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 5
+// CHECK4-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i32*
+// CHECK4-NEXT:    store i32 5, i32* [[TMP126]], align 4
+// CHECK4-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 5
+// CHECK4-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i32*
+// CHECK4-NEXT:    store i32 5, i32* [[TMP128]], align 4
+// CHECK4-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 5
+// CHECK4-NEXT:    store i8* null, i8** [[TMP129]], align 4
+// CHECK4-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 6
 // CHECK4-NEXT:    [[TMP131:%.*]] = bitcast i8** [[TMP130]] to i32*
-// CHECK4-NEXT:    store i32 5, i32* [[TMP131]], align 4
-// CHECK4-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK4-NEXT:    store i64 4, i64* [[TMP132]], align 4
-// CHECK4-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 5
-// CHECK4-NEXT:    store i8* null, i8** [[TMP133]], align 4
-// CHECK4-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 6
-// CHECK4-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[TMP135]], align 4
-// CHECK4-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 6
-// CHECK4-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[TMP137]], align 4
-// CHECK4-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK4-NEXT:    store i64 4, i64* [[TMP138]], align 4
-// CHECK4-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 6
-// CHECK4-NEXT:    store i8* null, i8** [[TMP139]], align 4
-// CHECK4-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 7
-// CHECK4-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to double**
-// CHECK4-NEXT:    store double* [[VLA1]], double** [[TMP141]], align 4
-// CHECK4-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 7
-// CHECK4-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to double**
-// CHECK4-NEXT:    store double* [[VLA1]], double** [[TMP143]], align 4
-// CHECK4-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK4-NEXT:    store i64 [[TMP97]], i64* [[TMP144]], align 4
-// CHECK4-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 7
+// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[TMP131]], align 4
+// CHECK4-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 6
+// CHECK4-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[TMP133]], align 4
+// CHECK4-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 6
+// CHECK4-NEXT:    store i8* null, i8** [[TMP134]], align 4
+// CHECK4-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 7
+// CHECK4-NEXT:    [[TMP136:%.*]] = bitcast i8** [[TMP135]] to double**
+// CHECK4-NEXT:    store double* [[VLA1]], double** [[TMP136]], align 4
+// CHECK4-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 7
+// CHECK4-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to double**
+// CHECK4-NEXT:    store double* [[VLA1]], double** [[TMP138]], align 4
+// CHECK4-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK4-NEXT:    store i64 [[TMP97]], i64* [[TMP139]], align 4
+// CHECK4-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 7
+// CHECK4-NEXT:    store i8* null, i8** [[TMP140]], align 4
+// CHECK4-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 8
+// CHECK4-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to %struct.TT**
+// CHECK4-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP142]], align 4
+// CHECK4-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 8
+// CHECK4-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to %struct.TT**
+// CHECK4-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP144]], align 4
+// CHECK4-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 8
 // CHECK4-NEXT:    store i8* null, i8** [[TMP145]], align 4
-// CHECK4-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 8
-// CHECK4-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to %struct.TT**
-// CHECK4-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP147]], align 4
-// CHECK4-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 8
-// CHECK4-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to %struct.TT**
-// CHECK4-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP149]], align 4
-// CHECK4-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK4-NEXT:    store i64 12, i64* [[TMP150]], align 4
-// CHECK4-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 8
-// CHECK4-NEXT:    store i8* null, i8** [[TMP151]], align 4
-// CHECK4-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 9
-// CHECK4-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP91]], i32* [[TMP153]], align 4
-// CHECK4-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 9
-// CHECK4-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP91]], i32* [[TMP155]], align 4
-// CHECK4-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK4-NEXT:    store i64 4, i64* [[TMP156]], align 4
-// CHECK4-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 9
-// CHECK4-NEXT:    store i8* null, i8** [[TMP157]], align 4
-// CHECK4-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 9
+// CHECK4-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP91]], i32* [[TMP147]], align 4
+// CHECK4-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 9
+// CHECK4-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP91]], i32* [[TMP149]], align 4
+// CHECK4-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 9
+// CHECK4-NEXT:    store i8* null, i8** [[TMP150]], align 4
+// CHECK4-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK4-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK4-NEXT:    [[TMP161:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145.region_id, i32 10, i8** [[TMP158]], i8** [[TMP159]], i64* [[TMP160]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK4-NEXT:    [[TMP162:%.*]] = icmp ne i32 [[TMP161]], 0
-// CHECK4-NEXT:    br i1 [[TMP162]], label [[OMP_OFFLOAD_FAILED27:%.*]], label [[OMP_OFFLOAD_CONT28:%.*]]
+// CHECK4-NEXT:    [[TMP154:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145.region_id, i32 10, i8** [[TMP151]], i8** [[TMP152]], i64* [[TMP153]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.11, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK4-NEXT:    [[TMP155:%.*]] = icmp ne i32 [[TMP154]], 0
+// CHECK4-NEXT:    br i1 [[TMP155]], label [[OMP_OFFLOAD_FAILED27:%.*]], label [[OMP_OFFLOAD_CONT28:%.*]]
 // CHECK4:       omp_offload.failed27:
 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145(i32 [[TMP89]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]], i32 [[TMP91]]) #[[ATTR3]]
 // CHECK4-NEXT:    br label [[OMP_OFFLOAD_CONT28]]
@@ -5744,10 +5670,10 @@ int bar(int n){
 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145(i32 [[TMP89]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]], i32 [[TMP91]]) #[[ATTR3]]
 // CHECK4-NEXT:    br label [[OMP_IF_END30]]
 // CHECK4:       omp_if.end30:
-// CHECK4-NEXT:    [[TMP163:%.*]] = load i32, i32* [[A]], align 4
-// CHECK4-NEXT:    [[TMP164:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP164]])
-// CHECK4-NEXT:    ret i32 [[TMP163]]
+// CHECK4-NEXT:    [[TMP156:%.*]] = load i32, i32* [[A]], align 4
+// CHECK4-NEXT:    [[TMP157:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP157]])
+// CHECK4-NEXT:    ret i32 [[TMP156]]
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103
@@ -6451,14 +6377,14 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP7:%.*]] = mul nuw i32 2, [[TMP1]]
 // CHECK4-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 2
 // CHECK4-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-// CHECK4-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK4-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
-// CHECK4-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK4-NEXT:    store double* [[A]], double** [[TMP13]], align 4
-// CHECK4-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK4-NEXT:    store i64 8, i64* [[TMP14]], align 4
+// CHECK4-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK4-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.13 to i8*), i32 40, i1 false)
+// CHECK4-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK4-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 4
+// CHECK4-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK4-NEXT:    store double* [[A]], double** [[TMP14]], align 4
 // CHECK4-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK4-NEXT:    store i8* null, i8** [[TMP15]], align 4
 // CHECK4-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -6467,47 +6393,41 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK4-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32*
 // CHECK4-NEXT:    store i32 [[TMP5]], i32* [[TMP19]], align 4
-// CHECK4-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK4-NEXT:    store i64 4, i64* [[TMP20]], align 4
-// CHECK4-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK4-NEXT:    store i8* null, i8** [[TMP21]], align 4
-// CHECK4-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK4-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
-// CHECK4-NEXT:    store i32 2, i32* [[TMP23]], align 4
-// CHECK4-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK4-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
-// CHECK4-NEXT:    store i32 2, i32* [[TMP25]], align 4
-// CHECK4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK4-NEXT:    store i64 4, i64* [[TMP26]], align 4
-// CHECK4-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK4-NEXT:    store i8* null, i8** [[TMP27]], align 4
-// CHECK4-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK4-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK4-NEXT:    store i8* null, i8** [[TMP20]], align 4
+// CHECK4-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK4-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
+// CHECK4-NEXT:    store i32 2, i32* [[TMP22]], align 4
+// CHECK4-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK4-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
+// CHECK4-NEXT:    store i32 2, i32* [[TMP24]], align 4
+// CHECK4-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK4-NEXT:    store i8* null, i8** [[TMP25]], align 4
+// CHECK4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK4-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP27]], align 4
+// CHECK4-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK4-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
 // CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP29]], align 4
-// CHECK4-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK4-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP31]], align 4
-// CHECK4-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK4-NEXT:    store i64 4, i64* [[TMP32]], align 4
-// CHECK4-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK4-NEXT:    store i8* null, i8** [[TMP33]], align 4
-// CHECK4-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK4-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK4-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 4
-// CHECK4-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK4-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK4-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 4
-// CHECK4-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK4-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 4
-// CHECK4-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK4-NEXT:    store i8* null, i8** [[TMP39]], align 4
-// CHECK4-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK4-NEXT:    store i8* null, i8** [[TMP30]], align 4
+// CHECK4-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK4-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK4-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 4
+// CHECK4-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK4-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK4-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 4
+// CHECK4-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK4-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 4
+// CHECK4-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK4-NEXT:    store i8* null, i8** [[TMP36]], align 4
+// CHECK4-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK4-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK4-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK4-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK4-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK4-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK4-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK4-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK4:       omp_offload.failed:
 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK4-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -6517,15 +6437,15 @@ int bar(int n){
 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK4-NEXT:    br label [[OMP_IF_END]]
 // CHECK4:       omp_if.end:
-// CHECK4-NEXT:    [[TMP45:%.*]] = mul nsw i32 1, [[TMP1]]
-// CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP45]]
+// CHECK4-NEXT:    [[TMP42:%.*]] = mul nsw i32 1, [[TMP1]]
+// CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP42]]
 // CHECK4-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
-// CHECK4-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK4-NEXT:    [[CONV:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK4-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK4-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP47]]
-// CHECK4-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK4-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK4-NEXT:    [[CONV:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK4-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK4-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP44]]
+// CHECK4-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK4-NEXT:    ret i32 [[ADD3]]
 //
 //
@@ -6628,7 +6548,7 @@ int bar(int n){
 // CHECK4-NEXT:    [[ADD6:%.*]] = add i32 [[TMP40]], 1
 // CHECK4-NEXT:    [[TMP41:%.*]] = zext i32 [[ADD6]] to i64
 // CHECK4-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP41]])
-// CHECK4-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l200.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK4-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l200.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK4-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK4-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK4:       omp_offload.failed:
@@ -6698,7 +6618,7 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK4-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK4-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK4-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l183.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK4-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l183.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK4-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK4-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK4:       omp_offload.failed:
@@ -6735,11 +6655,11 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4
 // CHECK4-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
 // CHECK4-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..12
 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -6861,11 +6781,11 @@ int bar(int n){
 // CHECK4-NEXT:    [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
 // CHECK4-NEXT:    store i8 [[TMP7]], i8* [[CONV3]], align 1
 // CHECK4-NEXT:    [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], [10 x i32]* [[TMP0]])
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], [10 x i32]* [[TMP0]])
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..13
+// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -7007,11 +6927,11 @@ int bar(int n){
 // CHECK4-NEXT:    [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
 // CHECK4-NEXT:    store i16 [[TMP3]], i16* [[CONV1]], align 2
 // CHECK4-NEXT:    [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -10662,14 +10582,14 @@ int bar(int n){
 // CHECK17-NEXT:    [[TMP95:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK17-NEXT:    [[TMP96:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK17-NEXT:    [[TMP97:%.*]] = mul nuw i64 [[TMP96]], 8
-// CHECK17-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP91]], i64* [[TMP99]], align 8
-// CHECK17-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP91]], i64* [[TMP101]], align 8
-// CHECK17-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 4, i64* [[TMP102]], align 8
+// CHECK17-NEXT:    [[TMP98:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP98]], i8* align 8 bitcast ([10 x i64]* @.offload_sizes.10 to i8*), i64 80, i1 false)
+// CHECK17-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP91]], i64* [[TMP100]], align 8
+// CHECK17-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP91]], i64* [[TMP102]], align 8
 // CHECK17-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 0
 // CHECK17-NEXT:    store i8* null, i8** [[TMP103]], align 8
 // CHECK17-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 1
@@ -10678,97 +10598,83 @@ int bar(int n){
 // CHECK17-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 1
 // CHECK17-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to [10 x float]**
 // CHECK17-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP107]], align 8
-// CHECK17-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 40, i64* [[TMP108]], align 8
-// CHECK17-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP109]], align 8
-// CHECK17-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP111]], align 8
-// CHECK17-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP113]], align 8
-// CHECK17-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 8, i64* [[TMP114]], align 8
-// CHECK17-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP115]], align 8
-// CHECK17-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP108]], align 8
+// CHECK17-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP110]], align 8
+// CHECK17-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP112]], align 8
+// CHECK17-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP113]], align 8
+// CHECK17-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to float**
+// CHECK17-NEXT:    store float* [[VLA]], float** [[TMP115]], align 8
+// CHECK17-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 3
 // CHECK17-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to float**
 // CHECK17-NEXT:    store float* [[VLA]], float** [[TMP117]], align 8
-// CHECK17-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to float**
-// CHECK17-NEXT:    store float* [[VLA]], float** [[TMP119]], align 8
-// CHECK17-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK17-NEXT:    store i64 [[TMP95]], i64* [[TMP120]], align 8
-// CHECK17-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 3
-// CHECK17-NEXT:    store i8* null, i8** [[TMP121]], align 8
-// CHECK17-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 4
+// CHECK17-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK17-NEXT:    store i64 [[TMP95]], i64* [[TMP118]], align 8
+// CHECK17-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 3
+// CHECK17-NEXT:    store i8* null, i8** [[TMP119]], align 8
+// CHECK17-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 4
+// CHECK17-NEXT:    [[TMP121:%.*]] = bitcast i8** [[TMP120]] to [5 x [10 x double]]**
+// CHECK17-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP121]], align 8
+// CHECK17-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 4
 // CHECK17-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to [5 x [10 x double]]**
 // CHECK17-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP123]], align 8
-// CHECK17-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 4
-// CHECK17-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to [5 x [10 x double]]**
-// CHECK17-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP125]], align 8
-// CHECK17-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK17-NEXT:    store i64 400, i64* [[TMP126]], align 8
-// CHECK17-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 4
-// CHECK17-NEXT:    store i8* null, i8** [[TMP127]], align 8
-// CHECK17-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 5
-// CHECK17-NEXT:    [[TMP129:%.*]] = bitcast i8** [[TMP128]] to i64*
-// CHECK17-NEXT:    store i64 5, i64* [[TMP129]], align 8
-// CHECK17-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 5
+// CHECK17-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 4
+// CHECK17-NEXT:    store i8* null, i8** [[TMP124]], align 8
+// CHECK17-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 5
+// CHECK17-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i64*
+// CHECK17-NEXT:    store i64 5, i64* [[TMP126]], align 8
+// CHECK17-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 5
+// CHECK17-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i64*
+// CHECK17-NEXT:    store i64 5, i64* [[TMP128]], align 8
+// CHECK17-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 5
+// CHECK17-NEXT:    store i8* null, i8** [[TMP129]], align 8
+// CHECK17-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 6
 // CHECK17-NEXT:    [[TMP131:%.*]] = bitcast i8** [[TMP130]] to i64*
-// CHECK17-NEXT:    store i64 5, i64* [[TMP131]], align 8
-// CHECK17-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK17-NEXT:    store i64 8, i64* [[TMP132]], align 8
-// CHECK17-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 5
-// CHECK17-NEXT:    store i8* null, i8** [[TMP133]], align 8
-// CHECK17-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 6
-// CHECK17-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP135]], align 8
-// CHECK17-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 6
-// CHECK17-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP137]], align 8
-// CHECK17-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK17-NEXT:    store i64 8, i64* [[TMP138]], align 8
-// CHECK17-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 6
-// CHECK17-NEXT:    store i8* null, i8** [[TMP139]], align 8
-// CHECK17-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 7
-// CHECK17-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to double**
-// CHECK17-NEXT:    store double* [[VLA1]], double** [[TMP141]], align 8
-// CHECK17-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 7
-// CHECK17-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to double**
-// CHECK17-NEXT:    store double* [[VLA1]], double** [[TMP143]], align 8
-// CHECK17-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK17-NEXT:    store i64 [[TMP97]], i64* [[TMP144]], align 8
-// CHECK17-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 7
+// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP131]], align 8
+// CHECK17-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 6
+// CHECK17-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP133]], align 8
+// CHECK17-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 6
+// CHECK17-NEXT:    store i8* null, i8** [[TMP134]], align 8
+// CHECK17-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 7
+// CHECK17-NEXT:    [[TMP136:%.*]] = bitcast i8** [[TMP135]] to double**
+// CHECK17-NEXT:    store double* [[VLA1]], double** [[TMP136]], align 8
+// CHECK17-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 7
+// CHECK17-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to double**
+// CHECK17-NEXT:    store double* [[VLA1]], double** [[TMP138]], align 8
+// CHECK17-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK17-NEXT:    store i64 [[TMP97]], i64* [[TMP139]], align 8
+// CHECK17-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 7
+// CHECK17-NEXT:    store i8* null, i8** [[TMP140]], align 8
+// CHECK17-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 8
+// CHECK17-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to %struct.TT**
+// CHECK17-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP142]], align 8
+// CHECK17-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 8
+// CHECK17-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to %struct.TT**
+// CHECK17-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP144]], align 8
+// CHECK17-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 8
 // CHECK17-NEXT:    store i8* null, i8** [[TMP145]], align 8
-// CHECK17-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 8
-// CHECK17-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to %struct.TT**
-// CHECK17-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP147]], align 8
-// CHECK17-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 8
-// CHECK17-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to %struct.TT**
-// CHECK17-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP149]], align 8
-// CHECK17-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK17-NEXT:    store i64 16, i64* [[TMP150]], align 8
-// CHECK17-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 8
-// CHECK17-NEXT:    store i8* null, i8** [[TMP151]], align 8
-// CHECK17-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 9
-// CHECK17-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP93]], i64* [[TMP153]], align 8
-// CHECK17-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 9
-// CHECK17-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP93]], i64* [[TMP155]], align 8
-// CHECK17-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK17-NEXT:    store i64 4, i64* [[TMP156]], align 8
-// CHECK17-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 9
-// CHECK17-NEXT:    store i8* null, i8** [[TMP157]], align 8
-// CHECK17-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 9
+// CHECK17-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP93]], i64* [[TMP147]], align 8
+// CHECK17-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 9
+// CHECK17-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP93]], i64* [[TMP149]], align 8
+// CHECK17-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 9
+// CHECK17-NEXT:    store i8* null, i8** [[TMP150]], align 8
+// CHECK17-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK17-NEXT:    [[TMP161:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145.region_id, i32 10, i8** [[TMP158]], i8** [[TMP159]], i64* [[TMP160]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP162:%.*]] = icmp ne i32 [[TMP161]], 0
-// CHECK17-NEXT:    br i1 [[TMP162]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
+// CHECK17-NEXT:    [[TMP154:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145.region_id, i32 10, i8** [[TMP151]], i8** [[TMP152]], i64* [[TMP153]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.11, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP155:%.*]] = icmp ne i32 [[TMP154]], 0
+// CHECK17-NEXT:    br i1 [[TMP155]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
 // CHECK17:       omp_offload.failed33:
 // CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145(i64 [[TMP91]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]], i64 [[TMP93]]) #[[ATTR3]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT34]]
@@ -10778,10 +10684,10 @@ int bar(int n){
 // CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145(i64 [[TMP91]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]], i64 [[TMP93]]) #[[ATTR3]]
 // CHECK17-NEXT:    br label [[OMP_IF_END36]]
 // CHECK17:       omp_if.end36:
-// CHECK17-NEXT:    [[TMP163:%.*]] = load i32, i32* [[A]], align 4
-// CHECK17-NEXT:    [[TMP164:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP164]])
-// CHECK17-NEXT:    ret i32 [[TMP163]]
+// CHECK17-NEXT:    [[TMP156:%.*]] = load i32, i32* [[A]], align 4
+// CHECK17-NEXT:    [[TMP157:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP157]])
+// CHECK17-NEXT:    ret i32 [[TMP156]]
 //
 //
 // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103
@@ -11502,14 +11408,14 @@ int bar(int n){
 // CHECK17-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP8:%.*]] = mul nuw i64 2, [[TMP2]]
 // CHECK17-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2
-// CHECK17-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK17-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
-// CHECK17-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK17-NEXT:    store double* [[A]], double** [[TMP13]], align 8
-// CHECK17-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 8, i64* [[TMP14]], align 8
+// CHECK17-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.13 to i8*), i64 40, i1 false)
+// CHECK17-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK17-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 8
+// CHECK17-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK17-NEXT:    store double* [[A]], double** [[TMP14]], align 8
 // CHECK17-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK17-NEXT:    store i8* null, i8** [[TMP15]], align 8
 // CHECK17-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -11518,47 +11424,41 @@ int bar(int n){
 // CHECK17-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK17-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i64*
 // CHECK17-NEXT:    store i64 [[TMP6]], i64* [[TMP19]], align 8
-// CHECK17-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 4, i64* [[TMP20]], align 8
-// CHECK17-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP21]], align 8
-// CHECK17-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
-// CHECK17-NEXT:    store i64 2, i64* [[TMP23]], align 8
-// CHECK17-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK17-NEXT:    store i64 2, i64* [[TMP25]], align 8
-// CHECK17-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 8, i64* [[TMP26]], align 8
-// CHECK17-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP27]], align 8
-// CHECK17-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP20]], align 8
+// CHECK17-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
+// CHECK17-NEXT:    store i64 2, i64* [[TMP22]], align 8
+// CHECK17-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK17-NEXT:    store i64 2, i64* [[TMP24]], align 8
+// CHECK17-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP25]], align 8
+// CHECK17-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP27]], align 8
+// CHECK17-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK17-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
 // CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP29]], align 8
-// CHECK17-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP2]], i64* [[TMP31]], align 8
-// CHECK17-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK17-NEXT:    store i64 8, i64* [[TMP32]], align 8
-// CHECK17-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK17-NEXT:    store i8* null, i8** [[TMP33]], align 8
-// CHECK17-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK17-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK17-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 8
-// CHECK17-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK17-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK17-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 8
-// CHECK17-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK17-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 8
-// CHECK17-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK17-NEXT:    store i8* null, i8** [[TMP39]], align 8
-// CHECK17-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK17-NEXT:    store i8* null, i8** [[TMP30]], align 8
+// CHECK17-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK17-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK17-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 8
+// CHECK17-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK17-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK17-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 8
+// CHECK17-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK17-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 8
+// CHECK17-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK17-NEXT:    store i8* null, i8** [[TMP36]], align 8
+// CHECK17-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK17-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK17-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK17-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK17-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK17:       omp_offload.failed:
 // CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -11568,15 +11468,15 @@ int bar(int n){
 // CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK17-NEXT:    br label [[OMP_IF_END]]
 // CHECK17:       omp_if.end:
-// CHECK17-NEXT:    [[TMP45:%.*]] = mul nsw i64 1, [[TMP2]]
-// CHECK17-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP45]]
+// CHECK17-NEXT:    [[TMP42:%.*]] = mul nsw i64 1, [[TMP2]]
+// CHECK17-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP42]]
 // CHECK17-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
-// CHECK17-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK17-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK17-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK17-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP47]]
-// CHECK17-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK17-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK17-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK17-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK17-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP44]]
+// CHECK17-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK17-NEXT:    ret i32 [[ADD4]]
 //
 //
@@ -11681,7 +11581,7 @@ int bar(int n){
 // CHECK17-NEXT:    [[ADD8:%.*]] = add i32 [[TMP40]], 1
 // CHECK17-NEXT:    [[TMP41:%.*]] = zext i32 [[ADD8]] to i64
 // CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP41]])
-// CHECK17-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l200.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l200.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK17-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK17-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK17:       omp_offload.failed:
@@ -11752,7 +11652,7 @@ int bar(int n){
 // CHECK17-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK17-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l183.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l183.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK17-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK17-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK17:       omp_offload.failed:
@@ -11791,11 +11691,11 @@ int bar(int n){
 // CHECK17-NEXT:    [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK17-NEXT:    [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..12
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11922,11 +11822,11 @@ int bar(int n){
 // CHECK17-NEXT:    [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
 // CHECK17-NEXT:    store i8 [[TMP7]], i8* [[CONV7]], align 1
 // CHECK17-NEXT:    [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..13
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -12072,11 +11972,11 @@ int bar(int n){
 // CHECK17-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
 // CHECK17-NEXT:    store i16 [[TMP3]], i16* [[CONV3]], align 2
 // CHECK17-NEXT:    [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -12382,14 +12282,14 @@ int bar(int n){
 // CHECK18-NEXT:    [[TMP95:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK18-NEXT:    [[TMP96:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK18-NEXT:    [[TMP97:%.*]] = mul nuw i64 [[TMP96]], 8
-// CHECK18-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP91]], i64* [[TMP99]], align 8
-// CHECK18-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP91]], i64* [[TMP101]], align 8
-// CHECK18-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 4, i64* [[TMP102]], align 8
+// CHECK18-NEXT:    [[TMP98:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP98]], i8* align 8 bitcast ([10 x i64]* @.offload_sizes.10 to i8*), i64 80, i1 false)
+// CHECK18-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP91]], i64* [[TMP100]], align 8
+// CHECK18-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP91]], i64* [[TMP102]], align 8
 // CHECK18-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 0
 // CHECK18-NEXT:    store i8* null, i8** [[TMP103]], align 8
 // CHECK18-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 1
@@ -12398,97 +12298,83 @@ int bar(int n){
 // CHECK18-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 1
 // CHECK18-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to [10 x float]**
 // CHECK18-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP107]], align 8
-// CHECK18-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 40, i64* [[TMP108]], align 8
-// CHECK18-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP109]], align 8
-// CHECK18-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP111]], align 8
-// CHECK18-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP113]], align 8
-// CHECK18-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 8, i64* [[TMP114]], align 8
-// CHECK18-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP115]], align 8
-// CHECK18-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP108]], align 8
+// CHECK18-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP110]], align 8
+// CHECK18-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP112]], align 8
+// CHECK18-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP113]], align 8
+// CHECK18-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to float**
+// CHECK18-NEXT:    store float* [[VLA]], float** [[TMP115]], align 8
+// CHECK18-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 3
 // CHECK18-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to float**
 // CHECK18-NEXT:    store float* [[VLA]], float** [[TMP117]], align 8
-// CHECK18-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to float**
-// CHECK18-NEXT:    store float* [[VLA]], float** [[TMP119]], align 8
-// CHECK18-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK18-NEXT:    store i64 [[TMP95]], i64* [[TMP120]], align 8
-// CHECK18-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 3
-// CHECK18-NEXT:    store i8* null, i8** [[TMP121]], align 8
-// CHECK18-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 4
+// CHECK18-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK18-NEXT:    store i64 [[TMP95]], i64* [[TMP118]], align 8
+// CHECK18-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 3
+// CHECK18-NEXT:    store i8* null, i8** [[TMP119]], align 8
+// CHECK18-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 4
+// CHECK18-NEXT:    [[TMP121:%.*]] = bitcast i8** [[TMP120]] to [5 x [10 x double]]**
+// CHECK18-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP121]], align 8
+// CHECK18-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 4
 // CHECK18-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to [5 x [10 x double]]**
 // CHECK18-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP123]], align 8
-// CHECK18-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 4
-// CHECK18-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to [5 x [10 x double]]**
-// CHECK18-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP125]], align 8
-// CHECK18-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK18-NEXT:    store i64 400, i64* [[TMP126]], align 8
-// CHECK18-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 4
-// CHECK18-NEXT:    store i8* null, i8** [[TMP127]], align 8
-// CHECK18-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 5
-// CHECK18-NEXT:    [[TMP129:%.*]] = bitcast i8** [[TMP128]] to i64*
-// CHECK18-NEXT:    store i64 5, i64* [[TMP129]], align 8
-// CHECK18-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 5
+// CHECK18-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 4
+// CHECK18-NEXT:    store i8* null, i8** [[TMP124]], align 8
+// CHECK18-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 5
+// CHECK18-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i64*
+// CHECK18-NEXT:    store i64 5, i64* [[TMP126]], align 8
+// CHECK18-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 5
+// CHECK18-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i64*
+// CHECK18-NEXT:    store i64 5, i64* [[TMP128]], align 8
+// CHECK18-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 5
+// CHECK18-NEXT:    store i8* null, i8** [[TMP129]], align 8
+// CHECK18-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 6
 // CHECK18-NEXT:    [[TMP131:%.*]] = bitcast i8** [[TMP130]] to i64*
-// CHECK18-NEXT:    store i64 5, i64* [[TMP131]], align 8
-// CHECK18-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK18-NEXT:    store i64 8, i64* [[TMP132]], align 8
-// CHECK18-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 5
-// CHECK18-NEXT:    store i8* null, i8** [[TMP133]], align 8
-// CHECK18-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 6
-// CHECK18-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP135]], align 8
-// CHECK18-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 6
-// CHECK18-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP137]], align 8
-// CHECK18-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK18-NEXT:    store i64 8, i64* [[TMP138]], align 8
-// CHECK18-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 6
-// CHECK18-NEXT:    store i8* null, i8** [[TMP139]], align 8
-// CHECK18-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 7
-// CHECK18-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to double**
-// CHECK18-NEXT:    store double* [[VLA1]], double** [[TMP141]], align 8
-// CHECK18-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 7
-// CHECK18-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to double**
-// CHECK18-NEXT:    store double* [[VLA1]], double** [[TMP143]], align 8
-// CHECK18-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK18-NEXT:    store i64 [[TMP97]], i64* [[TMP144]], align 8
-// CHECK18-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 7
+// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP131]], align 8
+// CHECK18-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 6
+// CHECK18-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP133]], align 8
+// CHECK18-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 6
+// CHECK18-NEXT:    store i8* null, i8** [[TMP134]], align 8
+// CHECK18-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 7
+// CHECK18-NEXT:    [[TMP136:%.*]] = bitcast i8** [[TMP135]] to double**
+// CHECK18-NEXT:    store double* [[VLA1]], double** [[TMP136]], align 8
+// CHECK18-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 7
+// CHECK18-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to double**
+// CHECK18-NEXT:    store double* [[VLA1]], double** [[TMP138]], align 8
+// CHECK18-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK18-NEXT:    store i64 [[TMP97]], i64* [[TMP139]], align 8
+// CHECK18-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 7
+// CHECK18-NEXT:    store i8* null, i8** [[TMP140]], align 8
+// CHECK18-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 8
+// CHECK18-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to %struct.TT**
+// CHECK18-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP142]], align 8
+// CHECK18-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 8
+// CHECK18-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to %struct.TT**
+// CHECK18-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP144]], align 8
+// CHECK18-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 8
 // CHECK18-NEXT:    store i8* null, i8** [[TMP145]], align 8
-// CHECK18-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 8
-// CHECK18-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to %struct.TT**
-// CHECK18-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP147]], align 8
-// CHECK18-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 8
-// CHECK18-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to %struct.TT**
-// CHECK18-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP149]], align 8
-// CHECK18-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK18-NEXT:    store i64 16, i64* [[TMP150]], align 8
-// CHECK18-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 8
-// CHECK18-NEXT:    store i8* null, i8** [[TMP151]], align 8
-// CHECK18-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 9
-// CHECK18-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP93]], i64* [[TMP153]], align 8
-// CHECK18-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 9
-// CHECK18-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP93]], i64* [[TMP155]], align 8
-// CHECK18-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK18-NEXT:    store i64 4, i64* [[TMP156]], align 8
-// CHECK18-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 9
-// CHECK18-NEXT:    store i8* null, i8** [[TMP157]], align 8
-// CHECK18-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 9
+// CHECK18-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP93]], i64* [[TMP147]], align 8
+// CHECK18-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 9
+// CHECK18-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP93]], i64* [[TMP149]], align 8
+// CHECK18-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 9
+// CHECK18-NEXT:    store i8* null, i8** [[TMP150]], align 8
+// CHECK18-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS30]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK18-NEXT:    [[TMP161:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145.region_id, i32 10, i8** [[TMP158]], i8** [[TMP159]], i64* [[TMP160]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP162:%.*]] = icmp ne i32 [[TMP161]], 0
-// CHECK18-NEXT:    br i1 [[TMP162]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
+// CHECK18-NEXT:    [[TMP154:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145.region_id, i32 10, i8** [[TMP151]], i8** [[TMP152]], i64* [[TMP153]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.11, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP155:%.*]] = icmp ne i32 [[TMP154]], 0
+// CHECK18-NEXT:    br i1 [[TMP155]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
 // CHECK18:       omp_offload.failed33:
 // CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145(i64 [[TMP91]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]], i64 [[TMP93]]) #[[ATTR3]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT34]]
@@ -12498,10 +12384,10 @@ int bar(int n){
 // CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145(i64 [[TMP91]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]], i64 [[TMP93]]) #[[ATTR3]]
 // CHECK18-NEXT:    br label [[OMP_IF_END36]]
 // CHECK18:       omp_if.end36:
-// CHECK18-NEXT:    [[TMP163:%.*]] = load i32, i32* [[A]], align 4
-// CHECK18-NEXT:    [[TMP164:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP164]])
-// CHECK18-NEXT:    ret i32 [[TMP163]]
+// CHECK18-NEXT:    [[TMP156:%.*]] = load i32, i32* [[A]], align 4
+// CHECK18-NEXT:    [[TMP157:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP157]])
+// CHECK18-NEXT:    ret i32 [[TMP156]]
 //
 //
 // CHECK18-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103
@@ -13222,14 +13108,14 @@ int bar(int n){
 // CHECK18-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP8:%.*]] = mul nuw i64 2, [[TMP2]]
 // CHECK18-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2
-// CHECK18-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK18-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
-// CHECK18-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK18-NEXT:    store double* [[A]], double** [[TMP13]], align 8
-// CHECK18-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 8, i64* [[TMP14]], align 8
+// CHECK18-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.13 to i8*), i64 40, i1 false)
+// CHECK18-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK18-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 8
+// CHECK18-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK18-NEXT:    store double* [[A]], double** [[TMP14]], align 8
 // CHECK18-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK18-NEXT:    store i8* null, i8** [[TMP15]], align 8
 // CHECK18-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -13238,47 +13124,41 @@ int bar(int n){
 // CHECK18-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK18-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i64*
 // CHECK18-NEXT:    store i64 [[TMP6]], i64* [[TMP19]], align 8
-// CHECK18-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 4, i64* [[TMP20]], align 8
-// CHECK18-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP21]], align 8
-// CHECK18-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
-// CHECK18-NEXT:    store i64 2, i64* [[TMP23]], align 8
-// CHECK18-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK18-NEXT:    store i64 2, i64* [[TMP25]], align 8
-// CHECK18-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 8, i64* [[TMP26]], align 8
-// CHECK18-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP27]], align 8
-// CHECK18-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP20]], align 8
+// CHECK18-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
+// CHECK18-NEXT:    store i64 2, i64* [[TMP22]], align 8
+// CHECK18-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK18-NEXT:    store i64 2, i64* [[TMP24]], align 8
+// CHECK18-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP25]], align 8
+// CHECK18-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP27]], align 8
+// CHECK18-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK18-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
 // CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP29]], align 8
-// CHECK18-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP2]], i64* [[TMP31]], align 8
-// CHECK18-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK18-NEXT:    store i64 8, i64* [[TMP32]], align 8
-// CHECK18-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK18-NEXT:    store i8* null, i8** [[TMP33]], align 8
-// CHECK18-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK18-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK18-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 8
-// CHECK18-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK18-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK18-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 8
-// CHECK18-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK18-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 8
-// CHECK18-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK18-NEXT:    store i8* null, i8** [[TMP39]], align 8
-// CHECK18-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK18-NEXT:    store i8* null, i8** [[TMP30]], align 8
+// CHECK18-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK18-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK18-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 8
+// CHECK18-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK18-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK18-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 8
+// CHECK18-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK18-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 8
+// CHECK18-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK18-NEXT:    store i8* null, i8** [[TMP36]], align 8
+// CHECK18-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK18-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK18-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK18-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK18-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK18:       omp_offload.failed:
 // CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -13288,15 +13168,15 @@ int bar(int n){
 // CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK18-NEXT:    br label [[OMP_IF_END]]
 // CHECK18:       omp_if.end:
-// CHECK18-NEXT:    [[TMP45:%.*]] = mul nsw i64 1, [[TMP2]]
-// CHECK18-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP45]]
+// CHECK18-NEXT:    [[TMP42:%.*]] = mul nsw i64 1, [[TMP2]]
+// CHECK18-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP42]]
 // CHECK18-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
-// CHECK18-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK18-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK18-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK18-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP47]]
-// CHECK18-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK18-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK18-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK18-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK18-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP44]]
+// CHECK18-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK18-NEXT:    ret i32 [[ADD4]]
 //
 //
@@ -13401,7 +13281,7 @@ int bar(int n){
 // CHECK18-NEXT:    [[ADD8:%.*]] = add i32 [[TMP40]], 1
 // CHECK18-NEXT:    [[TMP41:%.*]] = zext i32 [[ADD8]] to i64
 // CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP41]])
-// CHECK18-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l200.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l200.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK18-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK18-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK18:       omp_offload.failed:
@@ -13472,7 +13352,7 @@ int bar(int n){
 // CHECK18-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK18-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l183.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l183.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK18-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK18-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK18:       omp_offload.failed:
@@ -13511,11 +13391,11 @@ int bar(int n){
 // CHECK18-NEXT:    [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK18-NEXT:    [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..12
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -13642,11 +13522,11 @@ int bar(int n){
 // CHECK18-NEXT:    [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
 // CHECK18-NEXT:    store i8 [[TMP7]], i8* [[CONV7]], align 1
 // CHECK18-NEXT:    [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..13
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -13792,11 +13672,11 @@ int bar(int n){
 // CHECK18-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
 // CHECK18-NEXT:    store i16 [[TMP3]], i16* [[CONV3]], align 2
 // CHECK18-NEXT:    [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -14096,14 +13976,14 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP95:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK19-NEXT:    [[TMP96:%.*]] = mul nuw i32 [[TMP95]], 8
 // CHECK19-NEXT:    [[TMP97:%.*]] = sext i32 [[TMP96]] to i64
-// CHECK19-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP89]], i32* [[TMP99]], align 4
-// CHECK19-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP89]], i32* [[TMP101]], align 4
-// CHECK19-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 4, i64* [[TMP102]], align 4
+// CHECK19-NEXT:    [[TMP98:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP98]], i8* align 4 bitcast ([10 x i64]* @.offload_sizes.10 to i8*), i32 80, i1 false)
+// CHECK19-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP89]], i32* [[TMP100]], align 4
+// CHECK19-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP89]], i32* [[TMP102]], align 4
 // CHECK19-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 0
 // CHECK19-NEXT:    store i8* null, i8** [[TMP103]], align 4
 // CHECK19-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 1
@@ -14112,97 +13992,83 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 1
 // CHECK19-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to [10 x float]**
 // CHECK19-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP107]], align 4
-// CHECK19-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 40, i64* [[TMP108]], align 4
-// CHECK19-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP109]], align 4
-// CHECK19-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP111]], align 4
-// CHECK19-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP113]], align 4
-// CHECK19-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 4, i64* [[TMP114]], align 4
-// CHECK19-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP115]], align 4
-// CHECK19-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP108]], align 4
+// CHECK19-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP110]], align 4
+// CHECK19-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP112]], align 4
+// CHECK19-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP113]], align 4
+// CHECK19-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to float**
+// CHECK19-NEXT:    store float* [[VLA]], float** [[TMP115]], align 4
+// CHECK19-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 3
 // CHECK19-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to float**
 // CHECK19-NEXT:    store float* [[VLA]], float** [[TMP117]], align 4
-// CHECK19-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to float**
-// CHECK19-NEXT:    store float* [[VLA]], float** [[TMP119]], align 4
-// CHECK19-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK19-NEXT:    store i64 [[TMP94]], i64* [[TMP120]], align 4
-// CHECK19-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 3
-// CHECK19-NEXT:    store i8* null, i8** [[TMP121]], align 4
-// CHECK19-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 4
+// CHECK19-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK19-NEXT:    store i64 [[TMP94]], i64* [[TMP118]], align 4
+// CHECK19-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 3
+// CHECK19-NEXT:    store i8* null, i8** [[TMP119]], align 4
+// CHECK19-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 4
+// CHECK19-NEXT:    [[TMP121:%.*]] = bitcast i8** [[TMP120]] to [5 x [10 x double]]**
+// CHECK19-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP121]], align 4
+// CHECK19-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 4
 // CHECK19-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to [5 x [10 x double]]**
 // CHECK19-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP123]], align 4
-// CHECK19-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 4
-// CHECK19-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to [5 x [10 x double]]**
-// CHECK19-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP125]], align 4
-// CHECK19-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK19-NEXT:    store i64 400, i64* [[TMP126]], align 4
-// CHECK19-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 4
-// CHECK19-NEXT:    store i8* null, i8** [[TMP127]], align 4
-// CHECK19-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 5
-// CHECK19-NEXT:    [[TMP129:%.*]] = bitcast i8** [[TMP128]] to i32*
-// CHECK19-NEXT:    store i32 5, i32* [[TMP129]], align 4
-// CHECK19-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 5
+// CHECK19-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 4
+// CHECK19-NEXT:    store i8* null, i8** [[TMP124]], align 4
+// CHECK19-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 5
+// CHECK19-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i32*
+// CHECK19-NEXT:    store i32 5, i32* [[TMP126]], align 4
+// CHECK19-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 5
+// CHECK19-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i32*
+// CHECK19-NEXT:    store i32 5, i32* [[TMP128]], align 4
+// CHECK19-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 5
+// CHECK19-NEXT:    store i8* null, i8** [[TMP129]], align 4
+// CHECK19-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 6
 // CHECK19-NEXT:    [[TMP131:%.*]] = bitcast i8** [[TMP130]] to i32*
-// CHECK19-NEXT:    store i32 5, i32* [[TMP131]], align 4
-// CHECK19-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK19-NEXT:    store i64 4, i64* [[TMP132]], align 4
-// CHECK19-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 5
-// CHECK19-NEXT:    store i8* null, i8** [[TMP133]], align 4
-// CHECK19-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 6
-// CHECK19-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP135]], align 4
-// CHECK19-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 6
-// CHECK19-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP137]], align 4
-// CHECK19-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK19-NEXT:    store i64 4, i64* [[TMP138]], align 4
-// CHECK19-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 6
-// CHECK19-NEXT:    store i8* null, i8** [[TMP139]], align 4
-// CHECK19-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 7
-// CHECK19-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to double**
-// CHECK19-NEXT:    store double* [[VLA1]], double** [[TMP141]], align 4
-// CHECK19-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 7
-// CHECK19-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to double**
-// CHECK19-NEXT:    store double* [[VLA1]], double** [[TMP143]], align 4
-// CHECK19-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK19-NEXT:    store i64 [[TMP97]], i64* [[TMP144]], align 4
-// CHECK19-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 7
+// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP131]], align 4
+// CHECK19-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 6
+// CHECK19-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP133]], align 4
+// CHECK19-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 6
+// CHECK19-NEXT:    store i8* null, i8** [[TMP134]], align 4
+// CHECK19-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 7
+// CHECK19-NEXT:    [[TMP136:%.*]] = bitcast i8** [[TMP135]] to double**
+// CHECK19-NEXT:    store double* [[VLA1]], double** [[TMP136]], align 4
+// CHECK19-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 7
+// CHECK19-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to double**
+// CHECK19-NEXT:    store double* [[VLA1]], double** [[TMP138]], align 4
+// CHECK19-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK19-NEXT:    store i64 [[TMP97]], i64* [[TMP139]], align 4
+// CHECK19-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 7
+// CHECK19-NEXT:    store i8* null, i8** [[TMP140]], align 4
+// CHECK19-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 8
+// CHECK19-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to %struct.TT**
+// CHECK19-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP142]], align 4
+// CHECK19-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 8
+// CHECK19-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to %struct.TT**
+// CHECK19-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP144]], align 4
+// CHECK19-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 8
 // CHECK19-NEXT:    store i8* null, i8** [[TMP145]], align 4
-// CHECK19-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 8
-// CHECK19-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to %struct.TT**
-// CHECK19-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP147]], align 4
-// CHECK19-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 8
-// CHECK19-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to %struct.TT**
-// CHECK19-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP149]], align 4
-// CHECK19-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK19-NEXT:    store i64 12, i64* [[TMP150]], align 4
-// CHECK19-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 8
-// CHECK19-NEXT:    store i8* null, i8** [[TMP151]], align 4
-// CHECK19-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 9
-// CHECK19-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP91]], i32* [[TMP153]], align 4
-// CHECK19-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 9
-// CHECK19-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP91]], i32* [[TMP155]], align 4
-// CHECK19-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK19-NEXT:    store i64 4, i64* [[TMP156]], align 4
-// CHECK19-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 9
-// CHECK19-NEXT:    store i8* null, i8** [[TMP157]], align 4
-// CHECK19-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 9
+// CHECK19-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP91]], i32* [[TMP147]], align 4
+// CHECK19-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 9
+// CHECK19-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP91]], i32* [[TMP149]], align 4
+// CHECK19-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 9
+// CHECK19-NEXT:    store i8* null, i8** [[TMP150]], align 4
+// CHECK19-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK19-NEXT:    [[TMP161:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145.region_id, i32 10, i8** [[TMP158]], i8** [[TMP159]], i64* [[TMP160]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP162:%.*]] = icmp ne i32 [[TMP161]], 0
-// CHECK19-NEXT:    br i1 [[TMP162]], label [[OMP_OFFLOAD_FAILED27:%.*]], label [[OMP_OFFLOAD_CONT28:%.*]]
+// CHECK19-NEXT:    [[TMP154:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145.region_id, i32 10, i8** [[TMP151]], i8** [[TMP152]], i64* [[TMP153]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.11, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP155:%.*]] = icmp ne i32 [[TMP154]], 0
+// CHECK19-NEXT:    br i1 [[TMP155]], label [[OMP_OFFLOAD_FAILED27:%.*]], label [[OMP_OFFLOAD_CONT28:%.*]]
 // CHECK19:       omp_offload.failed27:
 // CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145(i32 [[TMP89]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]], i32 [[TMP91]]) #[[ATTR3]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT28]]
@@ -14212,10 +14078,10 @@ int bar(int n){
 // CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145(i32 [[TMP89]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]], i32 [[TMP91]]) #[[ATTR3]]
 // CHECK19-NEXT:    br label [[OMP_IF_END30]]
 // CHECK19:       omp_if.end30:
-// CHECK19-NEXT:    [[TMP163:%.*]] = load i32, i32* [[A]], align 4
-// CHECK19-NEXT:    [[TMP164:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP164]])
-// CHECK19-NEXT:    ret i32 [[TMP163]]
+// CHECK19-NEXT:    [[TMP156:%.*]] = load i32, i32* [[A]], align 4
+// CHECK19-NEXT:    [[TMP157:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP157]])
+// CHECK19-NEXT:    ret i32 [[TMP156]]
 //
 //
 // CHECK19-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103
@@ -14919,14 +14785,14 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP7:%.*]] = mul nuw i32 2, [[TMP1]]
 // CHECK19-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 2
 // CHECK19-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-// CHECK19-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK19-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
-// CHECK19-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK19-NEXT:    store double* [[A]], double** [[TMP13]], align 4
-// CHECK19-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 8, i64* [[TMP14]], align 4
+// CHECK19-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.13 to i8*), i32 40, i1 false)
+// CHECK19-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK19-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 4
+// CHECK19-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK19-NEXT:    store double* [[A]], double** [[TMP14]], align 4
 // CHECK19-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK19-NEXT:    store i8* null, i8** [[TMP15]], align 4
 // CHECK19-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -14935,47 +14801,41 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK19-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32*
 // CHECK19-NEXT:    store i32 [[TMP5]], i32* [[TMP19]], align 4
-// CHECK19-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP20]], align 4
-// CHECK19-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP21]], align 4
-// CHECK19-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
-// CHECK19-NEXT:    store i32 2, i32* [[TMP23]], align 4
-// CHECK19-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
-// CHECK19-NEXT:    store i32 2, i32* [[TMP25]], align 4
-// CHECK19-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 4, i64* [[TMP26]], align 4
-// CHECK19-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP27]], align 4
-// CHECK19-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP20]], align 4
+// CHECK19-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
+// CHECK19-NEXT:    store i32 2, i32* [[TMP22]], align 4
+// CHECK19-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
+// CHECK19-NEXT:    store i32 2, i32* [[TMP24]], align 4
+// CHECK19-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP25]], align 4
+// CHECK19-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP27]], align 4
+// CHECK19-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK19-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
 // CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP29]], align 4
-// CHECK19-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP31]], align 4
-// CHECK19-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK19-NEXT:    store i64 4, i64* [[TMP32]], align 4
-// CHECK19-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK19-NEXT:    store i8* null, i8** [[TMP33]], align 4
-// CHECK19-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK19-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK19-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 4
-// CHECK19-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK19-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK19-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 4
-// CHECK19-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK19-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 4
-// CHECK19-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK19-NEXT:    store i8* null, i8** [[TMP39]], align 4
-// CHECK19-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK19-NEXT:    store i8* null, i8** [[TMP30]], align 4
+// CHECK19-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK19-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK19-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 4
+// CHECK19-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK19-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK19-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 4
+// CHECK19-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK19-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 4
+// CHECK19-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK19-NEXT:    store i8* null, i8** [[TMP36]], align 4
+// CHECK19-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK19-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK19-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK19-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK19-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK19:       omp_offload.failed:
 // CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -14985,15 +14845,15 @@ int bar(int n){
 // CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK19-NEXT:    br label [[OMP_IF_END]]
 // CHECK19:       omp_if.end:
-// CHECK19-NEXT:    [[TMP45:%.*]] = mul nsw i32 1, [[TMP1]]
-// CHECK19-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP45]]
+// CHECK19-NEXT:    [[TMP42:%.*]] = mul nsw i32 1, [[TMP1]]
+// CHECK19-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP42]]
 // CHECK19-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
-// CHECK19-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK19-NEXT:    [[CONV:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK19-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK19-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP47]]
-// CHECK19-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK19-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK19-NEXT:    [[CONV:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK19-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK19-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP44]]
+// CHECK19-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK19-NEXT:    ret i32 [[ADD3]]
 //
 //
@@ -15096,7 +14956,7 @@ int bar(int n){
 // CHECK19-NEXT:    [[ADD6:%.*]] = add i32 [[TMP40]], 1
 // CHECK19-NEXT:    [[TMP41:%.*]] = zext i32 [[ADD6]] to i64
 // CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP41]])
-// CHECK19-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l200.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l200.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK19-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK19-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK19:       omp_offload.failed:
@@ -15166,7 +15026,7 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK19-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l183.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l183.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK19-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK19-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK19:       omp_offload.failed:
@@ -15203,11 +15063,11 @@ int bar(int n){
 // CHECK19-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
 // CHECK19-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..12
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -15329,11 +15189,11 @@ int bar(int n){
 // CHECK19-NEXT:    [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
 // CHECK19-NEXT:    store i8 [[TMP7]], i8* [[CONV3]], align 1
 // CHECK19-NEXT:    [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..13
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -15475,11 +15335,11 @@ int bar(int n){
 // CHECK19-NEXT:    [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
 // CHECK19-NEXT:    store i16 [[TMP3]], i16* [[CONV1]], align 2
 // CHECK19-NEXT:    [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -15778,14 +15638,14 @@ int bar(int n){
 // CHECK20-NEXT:    [[TMP95:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK20-NEXT:    [[TMP96:%.*]] = mul nuw i32 [[TMP95]], 8
 // CHECK20-NEXT:    [[TMP97:%.*]] = sext i32 [[TMP96]] to i64
-// CHECK20-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP89]], i32* [[TMP99]], align 4
-// CHECK20-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP101:%.*]] = bitcast i8** [[TMP100]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP89]], i32* [[TMP101]], align 4
-// CHECK20-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 4, i64* [[TMP102]], align 4
+// CHECK20-NEXT:    [[TMP98:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP98]], i8* align 4 bitcast ([10 x i64]* @.offload_sizes.10 to i8*), i32 80, i1 false)
+// CHECK20-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP100:%.*]] = bitcast i8** [[TMP99]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP89]], i32* [[TMP100]], align 4
+// CHECK20-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP89]], i32* [[TMP102]], align 4
 // CHECK20-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 0
 // CHECK20-NEXT:    store i8* null, i8** [[TMP103]], align 4
 // CHECK20-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 1
@@ -15794,97 +15654,83 @@ int bar(int n){
 // CHECK20-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 1
 // CHECK20-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to [10 x float]**
 // CHECK20-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP107]], align 4
-// CHECK20-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 40, i64* [[TMP108]], align 4
-// CHECK20-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP109]], align 4
-// CHECK20-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP111]], align 4
-// CHECK20-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP113]], align 4
-// CHECK20-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 4, i64* [[TMP114]], align 4
-// CHECK20-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP115]], align 4
-// CHECK20-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP108]], align 4
+// CHECK20-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP110]], align 4
+// CHECK20-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP112]], align 4
+// CHECK20-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP113]], align 4
+// CHECK20-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to float**
+// CHECK20-NEXT:    store float* [[VLA]], float** [[TMP115]], align 4
+// CHECK20-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 3
 // CHECK20-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to float**
 // CHECK20-NEXT:    store float* [[VLA]], float** [[TMP117]], align 4
-// CHECK20-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to float**
-// CHECK20-NEXT:    store float* [[VLA]], float** [[TMP119]], align 4
-// CHECK20-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK20-NEXT:    store i64 [[TMP94]], i64* [[TMP120]], align 4
-// CHECK20-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 3
-// CHECK20-NEXT:    store i8* null, i8** [[TMP121]], align 4
-// CHECK20-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 4
+// CHECK20-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK20-NEXT:    store i64 [[TMP94]], i64* [[TMP118]], align 4
+// CHECK20-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 3
+// CHECK20-NEXT:    store i8* null, i8** [[TMP119]], align 4
+// CHECK20-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 4
+// CHECK20-NEXT:    [[TMP121:%.*]] = bitcast i8** [[TMP120]] to [5 x [10 x double]]**
+// CHECK20-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP121]], align 4
+// CHECK20-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 4
 // CHECK20-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to [5 x [10 x double]]**
 // CHECK20-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP123]], align 4
-// CHECK20-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 4
-// CHECK20-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to [5 x [10 x double]]**
-// CHECK20-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP125]], align 4
-// CHECK20-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK20-NEXT:    store i64 400, i64* [[TMP126]], align 4
-// CHECK20-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 4
-// CHECK20-NEXT:    store i8* null, i8** [[TMP127]], align 4
-// CHECK20-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 5
-// CHECK20-NEXT:    [[TMP129:%.*]] = bitcast i8** [[TMP128]] to i32*
-// CHECK20-NEXT:    store i32 5, i32* [[TMP129]], align 4
-// CHECK20-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 5
+// CHECK20-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 4
+// CHECK20-NEXT:    store i8* null, i8** [[TMP124]], align 4
+// CHECK20-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 5
+// CHECK20-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i32*
+// CHECK20-NEXT:    store i32 5, i32* [[TMP126]], align 4
+// CHECK20-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 5
+// CHECK20-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i32*
+// CHECK20-NEXT:    store i32 5, i32* [[TMP128]], align 4
+// CHECK20-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 5
+// CHECK20-NEXT:    store i8* null, i8** [[TMP129]], align 4
+// CHECK20-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 6
 // CHECK20-NEXT:    [[TMP131:%.*]] = bitcast i8** [[TMP130]] to i32*
-// CHECK20-NEXT:    store i32 5, i32* [[TMP131]], align 4
-// CHECK20-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK20-NEXT:    store i64 4, i64* [[TMP132]], align 4
-// CHECK20-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 5
-// CHECK20-NEXT:    store i8* null, i8** [[TMP133]], align 4
-// CHECK20-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 6
-// CHECK20-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP135]], align 4
-// CHECK20-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 6
-// CHECK20-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP137]], align 4
-// CHECK20-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK20-NEXT:    store i64 4, i64* [[TMP138]], align 4
-// CHECK20-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 6
-// CHECK20-NEXT:    store i8* null, i8** [[TMP139]], align 4
-// CHECK20-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 7
-// CHECK20-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to double**
-// CHECK20-NEXT:    store double* [[VLA1]], double** [[TMP141]], align 4
-// CHECK20-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 7
-// CHECK20-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to double**
-// CHECK20-NEXT:    store double* [[VLA1]], double** [[TMP143]], align 4
-// CHECK20-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK20-NEXT:    store i64 [[TMP97]], i64* [[TMP144]], align 4
-// CHECK20-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 7
+// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP131]], align 4
+// CHECK20-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 6
+// CHECK20-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP133]], align 4
+// CHECK20-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 6
+// CHECK20-NEXT:    store i8* null, i8** [[TMP134]], align 4
+// CHECK20-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 7
+// CHECK20-NEXT:    [[TMP136:%.*]] = bitcast i8** [[TMP135]] to double**
+// CHECK20-NEXT:    store double* [[VLA1]], double** [[TMP136]], align 4
+// CHECK20-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 7
+// CHECK20-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to double**
+// CHECK20-NEXT:    store double* [[VLA1]], double** [[TMP138]], align 4
+// CHECK20-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK20-NEXT:    store i64 [[TMP97]], i64* [[TMP139]], align 4
+// CHECK20-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 7
+// CHECK20-NEXT:    store i8* null, i8** [[TMP140]], align 4
+// CHECK20-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 8
+// CHECK20-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to %struct.TT**
+// CHECK20-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP142]], align 4
+// CHECK20-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 8
+// CHECK20-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to %struct.TT**
+// CHECK20-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP144]], align 4
+// CHECK20-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 8
 // CHECK20-NEXT:    store i8* null, i8** [[TMP145]], align 4
-// CHECK20-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 8
-// CHECK20-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to %struct.TT**
-// CHECK20-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP147]], align 4
-// CHECK20-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 8
-// CHECK20-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to %struct.TT**
-// CHECK20-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP149]], align 4
-// CHECK20-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK20-NEXT:    store i64 12, i64* [[TMP150]], align 4
-// CHECK20-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 8
-// CHECK20-NEXT:    store i8* null, i8** [[TMP151]], align 4
-// CHECK20-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 9
-// CHECK20-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP91]], i32* [[TMP153]], align 4
-// CHECK20-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 9
-// CHECK20-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP91]], i32* [[TMP155]], align 4
-// CHECK20-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK20-NEXT:    store i64 4, i64* [[TMP156]], align 4
-// CHECK20-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 9
-// CHECK20-NEXT:    store i8* null, i8** [[TMP157]], align 4
-// CHECK20-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 9
+// CHECK20-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP91]], i32* [[TMP147]], align 4
+// CHECK20-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 9
+// CHECK20-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP91]], i32* [[TMP149]], align 4
+// CHECK20-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 9
+// CHECK20-NEXT:    store i8* null, i8** [[TMP150]], align 4
+// CHECK20-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS24]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK20-NEXT:    [[TMP161:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145.region_id, i32 10, i8** [[TMP158]], i8** [[TMP159]], i64* [[TMP160]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP162:%.*]] = icmp ne i32 [[TMP161]], 0
-// CHECK20-NEXT:    br i1 [[TMP162]], label [[OMP_OFFLOAD_FAILED27:%.*]], label [[OMP_OFFLOAD_CONT28:%.*]]
+// CHECK20-NEXT:    [[TMP154:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145.region_id, i32 10, i8** [[TMP151]], i8** [[TMP152]], i64* [[TMP153]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.11, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP155:%.*]] = icmp ne i32 [[TMP154]], 0
+// CHECK20-NEXT:    br i1 [[TMP155]], label [[OMP_OFFLOAD_FAILED27:%.*]], label [[OMP_OFFLOAD_CONT28:%.*]]
 // CHECK20:       omp_offload.failed27:
 // CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145(i32 [[TMP89]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]], i32 [[TMP91]]) #[[ATTR3]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT28]]
@@ -15894,10 +15740,10 @@ int bar(int n){
 // CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145(i32 [[TMP89]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]], i32 [[TMP91]]) #[[ATTR3]]
 // CHECK20-NEXT:    br label [[OMP_IF_END30]]
 // CHECK20:       omp_if.end30:
-// CHECK20-NEXT:    [[TMP163:%.*]] = load i32, i32* [[A]], align 4
-// CHECK20-NEXT:    [[TMP164:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP164]])
-// CHECK20-NEXT:    ret i32 [[TMP163]]
+// CHECK20-NEXT:    [[TMP156:%.*]] = load i32, i32* [[A]], align 4
+// CHECK20-NEXT:    [[TMP157:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP157]])
+// CHECK20-NEXT:    ret i32 [[TMP156]]
 //
 //
 // CHECK20-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103
@@ -16601,14 +16447,14 @@ int bar(int n){
 // CHECK20-NEXT:    [[TMP7:%.*]] = mul nuw i32 2, [[TMP1]]
 // CHECK20-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 2
 // CHECK20-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-// CHECK20-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK20-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
-// CHECK20-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK20-NEXT:    store double* [[A]], double** [[TMP13]], align 4
-// CHECK20-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 8, i64* [[TMP14]], align 4
+// CHECK20-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.13 to i8*), i32 40, i1 false)
+// CHECK20-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK20-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 4
+// CHECK20-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK20-NEXT:    store double* [[A]], double** [[TMP14]], align 4
 // CHECK20-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK20-NEXT:    store i8* null, i8** [[TMP15]], align 4
 // CHECK20-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -16617,47 +16463,41 @@ int bar(int n){
 // CHECK20-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK20-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32*
 // CHECK20-NEXT:    store i32 [[TMP5]], i32* [[TMP19]], align 4
-// CHECK20-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP20]], align 4
-// CHECK20-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP21]], align 4
-// CHECK20-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
-// CHECK20-NEXT:    store i32 2, i32* [[TMP23]], align 4
-// CHECK20-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
-// CHECK20-NEXT:    store i32 2, i32* [[TMP25]], align 4
-// CHECK20-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 4, i64* [[TMP26]], align 4
-// CHECK20-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP27]], align 4
-// CHECK20-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP20]], align 4
+// CHECK20-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
+// CHECK20-NEXT:    store i32 2, i32* [[TMP22]], align 4
+// CHECK20-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
+// CHECK20-NEXT:    store i32 2, i32* [[TMP24]], align 4
+// CHECK20-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP25]], align 4
+// CHECK20-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP27]], align 4
+// CHECK20-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK20-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
 // CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP29]], align 4
-// CHECK20-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP31]], align 4
-// CHECK20-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK20-NEXT:    store i64 4, i64* [[TMP32]], align 4
-// CHECK20-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK20-NEXT:    store i8* null, i8** [[TMP33]], align 4
-// CHECK20-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK20-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK20-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 4
-// CHECK20-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK20-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK20-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 4
-// CHECK20-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK20-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 4
-// CHECK20-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK20-NEXT:    store i8* null, i8** [[TMP39]], align 4
-// CHECK20-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK20-NEXT:    store i8* null, i8** [[TMP30]], align 4
+// CHECK20-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK20-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK20-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 4
+// CHECK20-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK20-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK20-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 4
+// CHECK20-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK20-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 4
+// CHECK20-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK20-NEXT:    store i8* null, i8** [[TMP36]], align 4
+// CHECK20-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK20-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK20-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK20-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK20-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK20:       omp_offload.failed:
 // CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -16667,15 +16507,15 @@ int bar(int n){
 // CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]]
 // CHECK20-NEXT:    br label [[OMP_IF_END]]
 // CHECK20:       omp_if.end:
-// CHECK20-NEXT:    [[TMP45:%.*]] = mul nsw i32 1, [[TMP1]]
-// CHECK20-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP45]]
+// CHECK20-NEXT:    [[TMP42:%.*]] = mul nsw i32 1, [[TMP1]]
+// CHECK20-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP42]]
 // CHECK20-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
-// CHECK20-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK20-NEXT:    [[CONV:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK20-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK20-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP47]]
-// CHECK20-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK20-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK20-NEXT:    [[CONV:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK20-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK20-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP44]]
+// CHECK20-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK20-NEXT:    ret i32 [[ADD3]]
 //
 //
@@ -16778,7 +16618,7 @@ int bar(int n){
 // CHECK20-NEXT:    [[ADD6:%.*]] = add i32 [[TMP40]], 1
 // CHECK20-NEXT:    [[TMP41:%.*]] = zext i32 [[ADD6]] to i64
 // CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP41]])
-// CHECK20-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l200.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l200.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK20-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK20-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK20:       omp_offload.failed:
@@ -16848,7 +16688,7 @@ int bar(int n){
 // CHECK20-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK20-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l183.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l183.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK20-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK20-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK20:       omp_offload.failed:
@@ -16885,11 +16725,11 @@ int bar(int n){
 // CHECK20-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
 // CHECK20-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..12
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -17011,11 +16851,11 @@ int bar(int n){
 // CHECK20-NEXT:    [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
 // CHECK20-NEXT:    store i8 [[TMP7]], i8* [[CONV3]], align 1
 // CHECK20-NEXT:    [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..13
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -17157,11 +16997,11 @@ int bar(int n){
 // CHECK20-NEXT:    [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
 // CHECK20-NEXT:    store i16 [[TMP3]], i16* [[CONV1]], align 2
 // CHECK20-NEXT:    [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4

diff  --git a/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp
index c754ff93a12b0..23ebaef6e77b3 100644
--- a/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp
@@ -713,14 +713,14 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP9:%.*]] = load i64, i64* [[M_CASTED]], align 8
 // CHECK9-NEXT:    [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]]
 // CHECK9-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
-// CHECK9-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP13]], align 8
-// CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP15]], align 8
-// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP16]], align 8
+// CHECK9-NEXT:    [[TMP12:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP12]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes to i8*), i64 40, i1 false)
+// CHECK9-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP14]], align 8
+// CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP16]], align 8
 // CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK9-NEXT:    store i8* null, i8** [[TMP17]], align 8
 // CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -729,75 +729,69 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP9]], i64* [[TMP21]], align 8
-// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 4, i64* [[TMP22]], align 8
-// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP25]], align 8
-// CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP27]], align 8
-// CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 8, i64* [[TMP28]], align 8
-// CHECK9-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP29]], align 8
-// CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP24]], align 8
+// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP26]], align 8
+// CHECK9-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP27]], align 8
+// CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP3]], i64* [[TMP29]], align 8
+// CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK9-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP3]], i64* [[TMP31]], align 8
-// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP3]], i64* [[TMP33]], align 8
-// CHECK9-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK9-NEXT:    store i64 8, i64* [[TMP34]], align 8
-// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP35]], align 8
-// CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP37]], align 8
-// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP39]], align 8
-// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK9-NEXT:    store i64 [[TMP11]], i64* [[TMP40]], align 8
-// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK9-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP45:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[TMP46:%.*]] = load i32, i32* [[M]], align 4
-// CHECK9-NEXT:    store i32 [[TMP46]], i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK9-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP32]], align 8
+// CHECK9-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP34]], align 8
+// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 8
+// CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK9-NEXT:    store i64 [[TMP11]], i64* [[TMP37]], align 8
+// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK9-NEXT:    store i8* null, i8** [[TMP38]], align 8
+// CHECK9-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP42:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[TMP43:%.*]] = load i32, i32* [[M]], align 4
+// CHECK9-NEXT:    store i32 [[TMP43]], i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK9-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK9-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK9-NEXT:    [[CONV5:%.*]] = sext i32 [[DIV]] to i64
-// CHECK9-NEXT:    [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK9-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP48]], 0
+// CHECK9-NEXT:    [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK9-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0
 // CHECK9-NEXT:    [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
 // CHECK9-NEXT:    [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
 // CHECK9-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]]
 // CHECK9-NEXT:    [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK9-NEXT:    store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK9-NEXT:    [[TMP49:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP49]], 1
+// CHECK9-NEXT:    [[TMP46:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
+// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP46]], 1
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK9-NEXT:    [[TMP50:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, i32 5, i8** [[TMP42]], i8** [[TMP43]], i64* [[TMP44]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
-// CHECK9-NEXT:    br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[TMP47:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, i32 5, i8** [[TMP39]], i8** [[TMP40]], i64* [[TMP41]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0
+// CHECK9-NEXT:    br i1 [[TMP48]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[TMP52:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP52]])
+// CHECK9-NEXT:    [[TMP49:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP49]])
 // CHECK9-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    [[TMP53:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP53]])
-// CHECK9-NEXT:    [[TMP54:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    ret i32 [[TMP54]]
+// CHECK9-NEXT:    [[TMP50:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP50]])
+// CHECK9-NEXT:    [[TMP51:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    ret i32 [[TMP51]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80
@@ -981,7 +975,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -1002,7 +996,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 20)
-// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK9-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK9-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
@@ -1104,7 +1098,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK9-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK9-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK9-NEXT:    ret void
@@ -1157,14 +1151,14 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP9:%.*]] = load i64, i64* [[M_CASTED]], align 8
 // CHECK10-NEXT:    [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]]
 // CHECK10-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
-// CHECK10-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP13]], align 8
-// CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP15]], align 8
-// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP16]], align 8
+// CHECK10-NEXT:    [[TMP12:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP12]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes to i8*), i64 40, i1 false)
+// CHECK10-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP14]], align 8
+// CHECK10-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP16]], align 8
 // CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK10-NEXT:    store i8* null, i8** [[TMP17]], align 8
 // CHECK10-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1173,75 +1167,69 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP9]], i64* [[TMP21]], align 8
-// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 4, i64* [[TMP22]], align 8
-// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP25]], align 8
-// CHECK10-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP27]], align 8
-// CHECK10-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 8, i64* [[TMP28]], align 8
-// CHECK10-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP29]], align 8
-// CHECK10-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP24]], align 8
+// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP26]], align 8
+// CHECK10-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP27]], align 8
+// CHECK10-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP3]], i64* [[TMP29]], align 8
+// CHECK10-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK10-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP3]], i64* [[TMP31]], align 8
-// CHECK10-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK10-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP3]], i64* [[TMP33]], align 8
-// CHECK10-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK10-NEXT:    store i64 8, i64* [[TMP34]], align 8
-// CHECK10-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK10-NEXT:    store i8* null, i8** [[TMP35]], align 8
-// CHECK10-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK10-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP37]], align 8
-// CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK10-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP39]], align 8
-// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK10-NEXT:    store i64 [[TMP11]], i64* [[TMP40]], align 8
-// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK10-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK10-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP45:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[TMP46:%.*]] = load i32, i32* [[M]], align 4
-// CHECK10-NEXT:    store i32 [[TMP46]], i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK10-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK10-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK10-NEXT:    store i8* null, i8** [[TMP32]], align 8
+// CHECK10-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK10-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP34]], align 8
+// CHECK10-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK10-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 8
+// CHECK10-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK10-NEXT:    store i64 [[TMP11]], i64* [[TMP37]], align 8
+// CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK10-NEXT:    store i8* null, i8** [[TMP38]], align 8
+// CHECK10-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP42:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[TMP43:%.*]] = load i32, i32* [[M]], align 4
+// CHECK10-NEXT:    store i32 [[TMP43]], i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK10-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK10-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK10-NEXT:    [[CONV5:%.*]] = sext i32 [[DIV]] to i64
-// CHECK10-NEXT:    [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK10-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP48]], 0
+// CHECK10-NEXT:    [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK10-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0
 // CHECK10-NEXT:    [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
 // CHECK10-NEXT:    [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
 // CHECK10-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]]
 // CHECK10-NEXT:    [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK10-NEXT:    store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK10-NEXT:    [[TMP49:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP49]], 1
+// CHECK10-NEXT:    [[TMP46:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
+// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP46]], 1
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK10-NEXT:    [[TMP50:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, i32 5, i8** [[TMP42]], i8** [[TMP43]], i64* [[TMP44]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
-// CHECK10-NEXT:    br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK10-NEXT:    [[TMP47:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, i32 5, i8** [[TMP39]], i8** [[TMP40]], i64* [[TMP41]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0
+// CHECK10-NEXT:    br i1 [[TMP48]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
 // CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK10:       omp_offload.cont:
-// CHECK10-NEXT:    [[TMP52:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP52]])
+// CHECK10-NEXT:    [[TMP49:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP49]])
 // CHECK10-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    [[TMP53:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP53]])
-// CHECK10-NEXT:    [[TMP54:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    ret i32 [[TMP54]]
+// CHECK10-NEXT:    [[TMP50:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP50]])
+// CHECK10-NEXT:    [[TMP51:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK10-NEXT:    ret i32 [[TMP51]]
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80
@@ -1425,7 +1413,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -1446,7 +1434,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 20)
-// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK10-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK10-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
@@ -1548,7 +1536,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK10-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK10-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK10-NEXT:    ret void
@@ -1598,14 +1586,14 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP0]], [[TMP1]]
 // CHECK11-NEXT:    [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4
 // CHECK11-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
-// CHECK11-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP12]], align 4
-// CHECK11-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP14]], align 4
-// CHECK11-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP15]], align 4
+// CHECK11-NEXT:    [[TMP11:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes to i8*), i32 40, i1 false)
+// CHECK11-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP13]], align 4
+// CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP15]], align 4
 // CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK11-NEXT:    store i8* null, i8** [[TMP16]], align 4
 // CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1614,75 +1602,69 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP7]], i32* [[TMP20]], align 4
-// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP21]], align 4
-// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP22]], align 4
-// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP24]], align 4
-// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP26]], align 4
-// CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 4, i64* [[TMP27]], align 4
-// CHECK11-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP28]], align 4
-// CHECK11-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP21]], align 4
+// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP23]], align 4
+// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP25]], align 4
+// CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP26]], align 4
+// CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP1]], i32* [[TMP28]], align 4
+// CHECK11-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK11-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP1]], i32* [[TMP30]], align 4
-// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP1]], i32* [[TMP32]], align 4
-// CHECK11-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK11-NEXT:    store i64 4, i64* [[TMP33]], align 4
-// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP34]], align 4
-// CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 4
-// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP38]], align 4
-// CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK11-NEXT:    store i64 [[TMP10]], i64* [[TMP39]], align 4
-// CHECK11-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK11-NEXT:    store i8* null, i8** [[TMP40]], align 4
-// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP44:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP44]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[TMP45:%.*]] = load i32, i32* [[M]], align 4
-// CHECK11-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK11-NEXT:    [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP46]], 0
+// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP31]], align 4
+// CHECK11-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP33]], align 4
+// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP35]], align 4
+// CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK11-NEXT:    store i64 [[TMP10]], i64* [[TMP36]], align 4
+// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK11-NEXT:    store i8* null, i8** [[TMP37]], align 4
+// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP41:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP41]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[TMP42:%.*]] = load i32, i32* [[M]], align 4
+// CHECK11-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT:    [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0
 // CHECK11-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK11-NEXT:    [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK11-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK11-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK11-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK11-NEXT:    [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
 // CHECK11-NEXT:    [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
 // CHECK11-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
 // CHECK11-NEXT:    [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK11-NEXT:    store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK11-NEXT:    [[TMP48:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP48]], 1
+// CHECK11-NEXT:    [[TMP45:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP45]], 1
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK11-NEXT:    [[TMP49:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, i32 5, i8** [[TMP41]], i8** [[TMP42]], i64* [[TMP43]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0
-// CHECK11-NEXT:    br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[TMP46:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, i32 5, i8** [[TMP38]], i8** [[TMP39]], i64* [[TMP40]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0
+// CHECK11-NEXT:    br i1 [[TMP47]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[TMP51:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP51]])
+// CHECK11-NEXT:    [[TMP48:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP48]])
 // CHECK11-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    [[TMP52:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP52]])
-// CHECK11-NEXT:    [[TMP53:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    ret i32 [[TMP53]]
+// CHECK11-NEXT:    [[TMP49:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP49]])
+// CHECK11-NEXT:    [[TMP50:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK11-NEXT:    ret i32 [[TMP50]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80
@@ -1858,7 +1840,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -1879,7 +1861,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 20)
-// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK11-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK11-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
@@ -1979,7 +1961,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK11-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK11-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK11-NEXT:    ret void
@@ -2029,14 +2011,14 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP0]], [[TMP1]]
 // CHECK12-NEXT:    [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4
 // CHECK12-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
-// CHECK12-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP12]], align 4
-// CHECK12-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP14]], align 4
-// CHECK12-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP15]], align 4
+// CHECK12-NEXT:    [[TMP11:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes to i8*), i32 40, i1 false)
+// CHECK12-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP13]], align 4
+// CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP15]], align 4
 // CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK12-NEXT:    store i8* null, i8** [[TMP16]], align 4
 // CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2045,75 +2027,69 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP7]], i32* [[TMP20]], align 4
-// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP21]], align 4
-// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP22]], align 4
-// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP24]], align 4
-// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP26]], align 4
-// CHECK12-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 4, i64* [[TMP27]], align 4
-// CHECK12-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP28]], align 4
-// CHECK12-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP21]], align 4
+// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP23]], align 4
+// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP25]], align 4
+// CHECK12-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP26]], align 4
+// CHECK12-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP1]], i32* [[TMP28]], align 4
+// CHECK12-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK12-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP1]], i32* [[TMP30]], align 4
-// CHECK12-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK12-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP1]], i32* [[TMP32]], align 4
-// CHECK12-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK12-NEXT:    store i64 4, i64* [[TMP33]], align 4
-// CHECK12-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK12-NEXT:    store i8* null, i8** [[TMP34]], align 4
-// CHECK12-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK12-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 4
-// CHECK12-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK12-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP38]], align 4
-// CHECK12-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK12-NEXT:    store i64 [[TMP10]], i64* [[TMP39]], align 4
-// CHECK12-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK12-NEXT:    store i8* null, i8** [[TMP40]], align 4
-// CHECK12-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP44:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP44]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[TMP45:%.*]] = load i32, i32* [[M]], align 4
-// CHECK12-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK12-NEXT:    [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP46]], 0
+// CHECK12-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK12-NEXT:    store i8* null, i8** [[TMP31]], align 4
+// CHECK12-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK12-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP33]], align 4
+// CHECK12-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK12-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP35]], align 4
+// CHECK12-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK12-NEXT:    store i64 [[TMP10]], i64* [[TMP36]], align 4
+// CHECK12-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK12-NEXT:    store i8* null, i8** [[TMP37]], align 4
+// CHECK12-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP41:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP41]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[TMP42:%.*]] = load i32, i32* [[M]], align 4
+// CHECK12-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK12-NEXT:    [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0
 // CHECK12-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK12-NEXT:    [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK12-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK12-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK12-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK12-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK12-NEXT:    [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
 // CHECK12-NEXT:    [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
 // CHECK12-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
 // CHECK12-NEXT:    [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK12-NEXT:    store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK12-NEXT:    [[TMP48:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP48]], 1
+// CHECK12-NEXT:    [[TMP45:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP45]], 1
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK12-NEXT:    [[TMP49:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, i32 5, i8** [[TMP41]], i8** [[TMP42]], i64* [[TMP43]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0
-// CHECK12-NEXT:    br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK12-NEXT:    [[TMP46:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, i32 5, i8** [[TMP38]], i8** [[TMP39]], i64* [[TMP40]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0
+// CHECK12-NEXT:    br i1 [[TMP47]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
 // CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK12:       omp_offload.cont:
-// CHECK12-NEXT:    [[TMP51:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP51]])
+// CHECK12-NEXT:    [[TMP48:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP48]])
 // CHECK12-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    [[TMP52:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP52]])
-// CHECK12-NEXT:    [[TMP53:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    ret i32 [[TMP53]]
+// CHECK12-NEXT:    [[TMP49:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP49]])
+// CHECK12-NEXT:    [[TMP50:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK12-NEXT:    ret i32 [[TMP50]]
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80
@@ -2289,7 +2265,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK12-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -2310,7 +2286,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 20)
-// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK12-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK12-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
@@ -2410,7 +2386,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK12-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK12-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK12-NEXT:    ret void

diff  --git a/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp
index ea1ea025bb2b2..59ffd63d282ac 100644
--- a/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp
@@ -1554,14 +1554,14 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK9-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK9-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK9-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK9-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK9-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK9-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK9-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK9-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK9-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1570,176 +1570,168 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK9-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK9-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK9-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK9-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK9-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK9-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK9-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK9-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK9-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK9-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK9-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK9-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK9-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK9-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK9-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK9-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.2 to i8*), i64 24, i1 false)
 // CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK9-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK9-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK9-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK9-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK9-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK9-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK9-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK9-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK9-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK9-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK9-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK9-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK9-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK9-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK9-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK9-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK9-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP60]])
-// CHECK9-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK9-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK9-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK9-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK9-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP58]])
+// CHECK9-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK9-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK9:       omp_offload.failed16:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK9:       omp_offload.cont17:
-// CHECK9-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK9-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[TMP61:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP61]], i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK9-NEXT:    [[TMP62:%.*]] = load i32, i32* [[N]], align 4
 // CHECK9-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP64]], i32* [[CONV20]], align 4
-// CHECK9-NEXT:    [[TMP65:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK9-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK9-NEXT:    store i32 [[TMP62]], i32* [[CONV20]], align 4
+// CHECK9-NEXT:    [[TMP63:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK9-NEXT:    [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
 // CHECK9-NEXT:    [[CONV21:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP66]], i32* [[CONV21]], align 4
-// CHECK9-NEXT:    [[TMP67:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK9-NEXT:    [[TMP68:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK9-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP65]], i64* [[TMP70]], align 8
-// CHECK9-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP65]], i64* [[TMP72]], align 8
-// CHECK9-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP73]], align 8
-// CHECK9-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP74]], align 8
-// CHECK9-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 [[TMP64]], i32* [[CONV21]], align 4
+// CHECK9-NEXT:    [[TMP65:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK9-NEXT:    [[TMP66:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK9-NEXT:    [[TMP67:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES25]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP67]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.5 to i8*), i64 32, i1 false)
+// CHECK9-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP63]], i64* [[TMP69]], align 8
+// CHECK9-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP63]], i64* [[TMP71]], align 8
+// CHECK9-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP72]], align 8
+// CHECK9-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP74]], align 8
+// CHECK9-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP76]], align 8
-// CHECK9-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
-// CHECK9-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 8, i64* [[TMP79]], align 8
-// CHECK9-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP80]], align 8
-// CHECK9-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP82]], align 8
-// CHECK9-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 8
-// CHECK9-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 [[TMP68]], i64* [[TMP85]], align 8
-// CHECK9-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP86]], align 8
-// CHECK9-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 8
-// CHECK9-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK9-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 3
-// CHECK9-NEXT:    store i64 4, i64* [[TMP91]], align 8
-// CHECK9-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP92]], align 8
-// CHECK9-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP96:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP96]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK9-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK9-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP97]], 0
+// CHECK9-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP77]], align 8
+// CHECK9-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP79]], align 8
+// CHECK9-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 8
+// CHECK9-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP66]], i64* [[TMP82]], align 8
+// CHECK9-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP83]], align 8
+// CHECK9-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP65]], i64* [[TMP85]], align 8
+// CHECK9-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP65]], i64* [[TMP87]], align 8
+// CHECK9-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP88]], align 8
+// CHECK9-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP92:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP92]], i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK9-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK9-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP93]], 0
 // CHECK9-NEXT:    [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1
 // CHECK9-NEXT:    [[SUB31:%.*]] = sub nsw i32 [[DIV30]], 1
 // CHECK9-NEXT:    store i32 [[SUB31]], i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK9-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK9-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP98]], 1
-// CHECK9-NEXT:    [[TMP99:%.*]] = zext i32 [[ADD32]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP99]])
-// CHECK9-NEXT:    [[TMP100:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102.region_id, i32 4, i8** [[TMP93]], i8** [[TMP94]], i64* [[TMP95]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP101:%.*]] = icmp ne i32 [[TMP100]], 0
-// CHECK9-NEXT:    br i1 [[TMP101]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
+// CHECK9-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
+// CHECK9-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP94]], 1
+// CHECK9-NEXT:    [[TMP95:%.*]] = zext i32 [[ADD32]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP95]])
+// CHECK9-NEXT:    [[TMP96:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102.region_id, i32 4, i8** [[TMP89]], i8** [[TMP90]], i64* [[TMP91]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP97:%.*]] = icmp ne i32 [[TMP96]], 0
+// CHECK9-NEXT:    br i1 [[TMP97]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
 // CHECK9:       omp_offload.failed33:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102(i64 [[TMP65]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP67]]) #[[ATTR3]]
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102(i64 [[TMP63]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP65]]) #[[ATTR3]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT34]]
 // CHECK9:       omp_offload.cont34:
-// CHECK9-NEXT:    [[TMP102:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP102]])
+// CHECK9-NEXT:    [[TMP98:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP98]])
 // CHECK9-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    [[TMP103:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP103]])
-// CHECK9-NEXT:    [[TMP104:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    ret i32 [[TMP104]]
+// CHECK9-NEXT:    [[TMP99:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP99]])
+// CHECK9-NEXT:    [[TMP100:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    ret i32 [[TMP100]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94
@@ -1999,11 +1991,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK9-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK9-NEXT:    [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2120,7 +2112,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -2148,7 +2140,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l76.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l76.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK9-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK9-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
@@ -2166,7 +2158,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK9-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l80.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l80.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK9-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK9-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK9:       omp_offload.failed5:
@@ -2184,7 +2176,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK9-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK9-NEXT:    [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
 // CHECK9-NEXT:    br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK9:       omp_offload.failed11:
@@ -2200,11 +2192,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK9-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2277,11 +2269,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK9-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2354,11 +2346,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK9-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2443,7 +2435,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK9-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK9-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK9-NEXT:    ret void
@@ -2499,14 +2491,14 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK10-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK10-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK10-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK10-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK10-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK10-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK10-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK10-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK10-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK10-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2515,176 +2507,168 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK10-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK10-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK10-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK10-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK10-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK10-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK10-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK10-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK10-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK10-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK10-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK10-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK10-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK10-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK10-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
 // CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK10:       omp_offload.cont:
-// CHECK10-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK10-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK10-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK10-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK10-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK10-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK10-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK10-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK10-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.2 to i8*), i64 24, i1 false)
 // CHECK10-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK10-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK10-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK10-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK10-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK10-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK10-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK10-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK10-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK10-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK10-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK10-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK10-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK10-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK10-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK10-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK10-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK10-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK10-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK10-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK10-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK10-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK10-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK10-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK10-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK10-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK10-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK10-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK10-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK10-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK10-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK10-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK10-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK10-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK10-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP60]])
-// CHECK10-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK10-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK10-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK10-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK10-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP58]])
+// CHECK10-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK10-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK10:       omp_offload.failed16:
-// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK10:       omp_offload.cont17:
-// CHECK10-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK10-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    [[TMP61:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP61]], i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK10-NEXT:    [[TMP62:%.*]] = load i32, i32* [[N]], align 4
 // CHECK10-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK10-NEXT:    store i32 [[TMP64]], i32* [[CONV20]], align 4
-// CHECK10-NEXT:    [[TMP65:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK10-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK10-NEXT:    store i32 [[TMP62]], i32* [[CONV20]], align 4
+// CHECK10-NEXT:    [[TMP63:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK10-NEXT:    [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
 // CHECK10-NEXT:    [[CONV21:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
-// CHECK10-NEXT:    store i32 [[TMP66]], i32* [[CONV21]], align 4
-// CHECK10-NEXT:    [[TMP67:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK10-NEXT:    [[TMP68:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK10-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP65]], i64* [[TMP70]], align 8
-// CHECK10-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP65]], i64* [[TMP72]], align 8
-// CHECK10-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP73]], align 8
-// CHECK10-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
-// CHECK10-NEXT:    store i8* null, i8** [[TMP74]], align 8
-// CHECK10-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK10-NEXT:    store i32 [[TMP64]], i32* [[CONV21]], align 4
+// CHECK10-NEXT:    [[TMP65:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK10-NEXT:    [[TMP66:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK10-NEXT:    [[TMP67:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES25]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP67]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.5 to i8*), i64 32, i1 false)
+// CHECK10-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP63]], i64* [[TMP69]], align 8
+// CHECK10-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP63]], i64* [[TMP71]], align 8
+// CHECK10-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
+// CHECK10-NEXT:    store i8* null, i8** [[TMP72]], align 8
+// CHECK10-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK10-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP74]], align 8
+// CHECK10-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP76]], align 8
-// CHECK10-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
-// CHECK10-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
-// CHECK10-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 8, i64* [[TMP79]], align 8
-// CHECK10-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP80]], align 8
-// CHECK10-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP82]], align 8
-// CHECK10-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 8
-// CHECK10-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 [[TMP68]], i64* [[TMP85]], align 8
-// CHECK10-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP86]], align 8
-// CHECK10-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
-// CHECK10-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 8
-// CHECK10-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
-// CHECK10-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK10-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 3
-// CHECK10-NEXT:    store i64 4, i64* [[TMP91]], align 8
-// CHECK10-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
-// CHECK10-NEXT:    store i8* null, i8** [[TMP92]], align 8
-// CHECK10-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP96:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP96]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK10-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK10-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP97]], 0
+// CHECK10-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP77]], align 8
+// CHECK10-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP79]], align 8
+// CHECK10-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 8
+// CHECK10-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP66]], i64* [[TMP82]], align 8
+// CHECK10-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP83]], align 8
+// CHECK10-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP65]], i64* [[TMP85]], align 8
+// CHECK10-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP65]], i64* [[TMP87]], align 8
+// CHECK10-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
+// CHECK10-NEXT:    store i8* null, i8** [[TMP88]], align 8
+// CHECK10-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP92:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP92]], i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK10-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK10-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP93]], 0
 // CHECK10-NEXT:    [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1
 // CHECK10-NEXT:    [[SUB31:%.*]] = sub nsw i32 [[DIV30]], 1
 // CHECK10-NEXT:    store i32 [[SUB31]], i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK10-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK10-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP98]], 1
-// CHECK10-NEXT:    [[TMP99:%.*]] = zext i32 [[ADD32]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP99]])
-// CHECK10-NEXT:    [[TMP100:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102.region_id, i32 4, i8** [[TMP93]], i8** [[TMP94]], i64* [[TMP95]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP101:%.*]] = icmp ne i32 [[TMP100]], 0
-// CHECK10-NEXT:    br i1 [[TMP101]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
+// CHECK10-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
+// CHECK10-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP94]], 1
+// CHECK10-NEXT:    [[TMP95:%.*]] = zext i32 [[ADD32]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP95]])
+// CHECK10-NEXT:    [[TMP96:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102.region_id, i32 4, i8** [[TMP89]], i8** [[TMP90]], i64* [[TMP91]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP97:%.*]] = icmp ne i32 [[TMP96]], 0
+// CHECK10-NEXT:    br i1 [[TMP97]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
 // CHECK10:       omp_offload.failed33:
-// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102(i64 [[TMP65]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP67]]) #[[ATTR3]]
+// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102(i64 [[TMP63]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP65]]) #[[ATTR3]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT34]]
 // CHECK10:       omp_offload.cont34:
-// CHECK10-NEXT:    [[TMP102:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP102]])
+// CHECK10-NEXT:    [[TMP98:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP98]])
 // CHECK10-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    [[TMP103:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP103]])
-// CHECK10-NEXT:    [[TMP104:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    ret i32 [[TMP104]]
+// CHECK10-NEXT:    [[TMP99:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP99]])
+// CHECK10-NEXT:    [[TMP100:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK10-NEXT:    ret i32 [[TMP100]]
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94
@@ -2944,11 +2928,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK10-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK10-NEXT:    [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..3
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..4
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3065,7 +3049,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -3093,7 +3077,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l76.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l76.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK10-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK10-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
@@ -3111,7 +3095,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK10-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l80.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l80.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK10-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK10-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK10:       omp_offload.failed5:
@@ -3129,7 +3113,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK10-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK10-NEXT:    [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
 // CHECK10-NEXT:    br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK10:       omp_offload.failed11:
@@ -3145,11 +3129,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK10-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK10-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3222,11 +3206,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK10-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK10-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..7
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3299,11 +3283,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK10-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK10-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..10
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3388,7 +3372,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK10-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK10-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK10-NEXT:    ret void
@@ -3443,14 +3427,14 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK11-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK11-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK11-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK11-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK11-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK11-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK11-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK11-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK11-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -3459,175 +3443,167 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK11-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK11-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK11-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK11-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK11-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK11-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK11-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK11-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK11-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK11-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK11-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK11-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK11-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK11-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK11-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK11-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK11-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK11-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK11-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.2 to i8*), i32 24, i1 false)
 // CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK11-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK11-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK11-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK11-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK11-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK11-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK11-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK11-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK11-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK11-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK11-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK11-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK11-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK11-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK11-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK11-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK11-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK11-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK11-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK11-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK11-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK11-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK11-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP61]])
-// CHECK11-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK11-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK11-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK11-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK11-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP59]])
+// CHECK11-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK11-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK11:       omp_offload.failed15:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK11:       omp_offload.cont16:
-// CHECK11-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP64]], i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK11-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP65]], i32* [[N_CASTED18]], align 4
-// CHECK11-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N_CASTED18]], align 4
-// CHECK11-NEXT:    [[TMP67:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK11-NEXT:    store i32 [[TMP67]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK11-NEXT:    [[TMP68:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK11-NEXT:    [[TMP69:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK11-NEXT:    [[TMP70:%.*]] = sext i32 [[TMP69]] to i64
-// CHECK11-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP66]], i32* [[TMP72]], align 4
-// CHECK11-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP66]], i32* [[TMP74]], align 4
-// CHECK11-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP75]], align 4
-// CHECK11-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP76]], align 4
-// CHECK11-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP62:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP62]], i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK11-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP63]], i32* [[N_CASTED18]], align 4
+// CHECK11-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N_CASTED18]], align 4
+// CHECK11-NEXT:    [[TMP65:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK11-NEXT:    store i32 [[TMP65]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK11-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK11-NEXT:    [[TMP67:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK11-NEXT:    [[TMP68:%.*]] = sext i32 [[TMP67]] to i64
+// CHECK11-NEXT:    [[TMP69:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES22]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP69]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.5 to i8*), i32 32, i1 false)
+// CHECK11-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP64]], i32* [[TMP71]], align 4
+// CHECK11-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP64]], i32* [[TMP73]], align 4
+// CHECK11-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP74]], align 4
+// CHECK11-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP76]], align 4
+// CHECK11-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP78]], align 4
-// CHECK11-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
-// CHECK11-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP81]], align 4
-// CHECK11-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP82]], align 4
-// CHECK11-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 4
-// CHECK11-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP86]], align 4
-// CHECK11-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 [[TMP70]], i64* [[TMP87]], align 4
-// CHECK11-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP88]], align 4
-// CHECK11-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP68]], i32* [[TMP90]], align 4
-// CHECK11-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP68]], i32* [[TMP92]], align 4
-// CHECK11-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 3
-// CHECK11-NEXT:    store i64 4, i64* [[TMP93]], align 4
-// CHECK11-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP94]], align 4
-// CHECK11-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP98:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP98]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK11-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK11-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP99]], 0
+// CHECK11-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP79]], align 4
+// CHECK11-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 4
+// CHECK11-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 4
+// CHECK11-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
+// CHECK11-NEXT:    store i64 [[TMP68]], i64* [[TMP84]], align 4
+// CHECK11-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP85]], align 4
+// CHECK11-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP66]], i32* [[TMP87]], align 4
+// CHECK11-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP66]], i32* [[TMP89]], align 4
+// CHECK11-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP90]], align 4
+// CHECK11-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP94:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP94]], i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK11-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK11-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP95]], 0
 // CHECK11-NEXT:    [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1
 // CHECK11-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[DIV27]], 1
 // CHECK11-NEXT:    store i32 [[SUB28]], i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK11-NEXT:    [[TMP100:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK11-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP100]], 1
-// CHECK11-NEXT:    [[TMP101:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP101]])
-// CHECK11-NEXT:    [[TMP102:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102.region_id, i32 4, i8** [[TMP95]], i8** [[TMP96]], i64* [[TMP97]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP103:%.*]] = icmp ne i32 [[TMP102]], 0
-// CHECK11-NEXT:    br i1 [[TMP103]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK11-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK11-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP96]], 1
+// CHECK11-NEXT:    [[TMP97:%.*]] = zext i32 [[ADD29]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP97]])
+// CHECK11-NEXT:    [[TMP98:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102.region_id, i32 4, i8** [[TMP91]], i8** [[TMP92]], i64* [[TMP93]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP99:%.*]] = icmp ne i32 [[TMP98]], 0
+// CHECK11-NEXT:    br i1 [[TMP99]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
 // CHECK11:       omp_offload.failed30:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102(i32 [[TMP66]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP68]]) #[[ATTR3]]
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102(i32 [[TMP64]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP66]]) #[[ATTR3]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
 // CHECK11:       omp_offload.cont31:
-// CHECK11-NEXT:    [[TMP104:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP104]])
+// CHECK11-NEXT:    [[TMP100:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP100]])
 // CHECK11-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    [[TMP105:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP105]])
-// CHECK11-NEXT:    [[TMP106:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    ret i32 [[TMP106]]
+// CHECK11-NEXT:    [[TMP101:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP101]])
+// CHECK11-NEXT:    [[TMP102:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK11-NEXT:    ret i32 [[TMP102]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94
@@ -3875,11 +3851,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK11-NEXT:    store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK11-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -3993,7 +3969,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -4021,7 +3997,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l76.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l76.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK11-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK11-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
@@ -4039,7 +4015,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK11-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l80.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l80.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK11-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK11-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK11:       omp_offload.failed5:
@@ -4057,7 +4033,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK11-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK11-NEXT:    [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
 // CHECK11-NEXT:    br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK11:       omp_offload.failed11:
@@ -4073,11 +4049,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK11-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -4149,11 +4125,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK11-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -4225,11 +4201,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK11-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -4313,7 +4289,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK11-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK11-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK11-NEXT:    ret void
@@ -4368,14 +4344,14 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK12-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK12-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK12-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK12-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK12-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK12-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK12-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK12-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK12-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK12-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -4384,175 +4360,167 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK12-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK12-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK12-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK12-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK12-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK12-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK12-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK12-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK12-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK12-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK12-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK12-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK12-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK12-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK12-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
 // CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK12:       omp_offload.cont:
-// CHECK12-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK12-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK12-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK12-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK12-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK12-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK12-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK12-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK12-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.2 to i8*), i32 24, i1 false)
 // CHECK12-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK12-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK12-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK12-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK12-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK12-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK12-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK12-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK12-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK12-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK12-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK12-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK12-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK12-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK12-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK12-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK12-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK12-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK12-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK12-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK12-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK12-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK12-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK12-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK12-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK12-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK12-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK12-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK12-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK12-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK12-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK12-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK12-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK12-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK12-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK12-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK12-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK12-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK12-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK12-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP61]])
-// CHECK12-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK12-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK12-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK12-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK12-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP59]])
+// CHECK12-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK12-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK12:       omp_offload.failed15:
-// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK12:       omp_offload.cont16:
-// CHECK12-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP64]], i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK12-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP65]], i32* [[N_CASTED18]], align 4
-// CHECK12-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N_CASTED18]], align 4
-// CHECK12-NEXT:    [[TMP67:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK12-NEXT:    store i32 [[TMP67]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK12-NEXT:    [[TMP68:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK12-NEXT:    [[TMP69:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK12-NEXT:    [[TMP70:%.*]] = sext i32 [[TMP69]] to i64
-// CHECK12-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP66]], i32* [[TMP72]], align 4
-// CHECK12-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP66]], i32* [[TMP74]], align 4
-// CHECK12-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP75]], align 4
-// CHECK12-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
-// CHECK12-NEXT:    store i8* null, i8** [[TMP76]], align 4
-// CHECK12-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP62:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP62]], i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK12-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP63]], i32* [[N_CASTED18]], align 4
+// CHECK12-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N_CASTED18]], align 4
+// CHECK12-NEXT:    [[TMP65:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK12-NEXT:    store i32 [[TMP65]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK12-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK12-NEXT:    [[TMP67:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK12-NEXT:    [[TMP68:%.*]] = sext i32 [[TMP67]] to i64
+// CHECK12-NEXT:    [[TMP69:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES22]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP69]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.5 to i8*), i32 32, i1 false)
+// CHECK12-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP64]], i32* [[TMP71]], align 4
+// CHECK12-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP64]], i32* [[TMP73]], align 4
+// CHECK12-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
+// CHECK12-NEXT:    store i8* null, i8** [[TMP74]], align 4
+// CHECK12-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP76]], align 4
+// CHECK12-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP78]], align 4
-// CHECK12-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
-// CHECK12-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
-// CHECK12-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP81]], align 4
-// CHECK12-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP82]], align 4
-// CHECK12-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 4
-// CHECK12-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP86]], align 4
-// CHECK12-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 [[TMP70]], i64* [[TMP87]], align 4
-// CHECK12-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP88]], align 4
-// CHECK12-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
-// CHECK12-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP68]], i32* [[TMP90]], align 4
-// CHECK12-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
-// CHECK12-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP68]], i32* [[TMP92]], align 4
-// CHECK12-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 3
-// CHECK12-NEXT:    store i64 4, i64* [[TMP93]], align 4
-// CHECK12-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
-// CHECK12-NEXT:    store i8* null, i8** [[TMP94]], align 4
-// CHECK12-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP98:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP98]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK12-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK12-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP99]], 0
+// CHECK12-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP79]], align 4
+// CHECK12-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 4
+// CHECK12-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 4
+// CHECK12-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
+// CHECK12-NEXT:    store i64 [[TMP68]], i64* [[TMP84]], align 4
+// CHECK12-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP85]], align 4
+// CHECK12-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP66]], i32* [[TMP87]], align 4
+// CHECK12-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP66]], i32* [[TMP89]], align 4
+// CHECK12-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
+// CHECK12-NEXT:    store i8* null, i8** [[TMP90]], align 4
+// CHECK12-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP94:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP94]], i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK12-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK12-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP95]], 0
 // CHECK12-NEXT:    [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1
 // CHECK12-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[DIV27]], 1
 // CHECK12-NEXT:    store i32 [[SUB28]], i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK12-NEXT:    [[TMP100:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK12-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP100]], 1
-// CHECK12-NEXT:    [[TMP101:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP101]])
-// CHECK12-NEXT:    [[TMP102:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102.region_id, i32 4, i8** [[TMP95]], i8** [[TMP96]], i64* [[TMP97]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP103:%.*]] = icmp ne i32 [[TMP102]], 0
-// CHECK12-NEXT:    br i1 [[TMP103]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK12-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK12-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP96]], 1
+// CHECK12-NEXT:    [[TMP97:%.*]] = zext i32 [[ADD29]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP97]])
+// CHECK12-NEXT:    [[TMP98:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102.region_id, i32 4, i8** [[TMP91]], i8** [[TMP92]], i64* [[TMP93]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP99:%.*]] = icmp ne i32 [[TMP98]], 0
+// CHECK12-NEXT:    br i1 [[TMP99]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
 // CHECK12:       omp_offload.failed30:
-// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102(i32 [[TMP66]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP68]]) #[[ATTR3]]
+// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102(i32 [[TMP64]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP66]]) #[[ATTR3]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
 // CHECK12:       omp_offload.cont31:
-// CHECK12-NEXT:    [[TMP104:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP104]])
+// CHECK12-NEXT:    [[TMP100:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP100]])
 // CHECK12-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    [[TMP105:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP105]])
-// CHECK12-NEXT:    [[TMP106:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    ret i32 [[TMP106]]
+// CHECK12-NEXT:    [[TMP101:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP101]])
+// CHECK12-NEXT:    [[TMP102:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK12-NEXT:    ret i32 [[TMP102]]
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94
@@ -4800,11 +4768,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK12-NEXT:    store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK12-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..3
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..4
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -4918,7 +4886,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK12-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -4946,7 +4914,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l76.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l76.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK12-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK12-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
@@ -4964,7 +4932,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK12-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l80.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l80.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK12-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK12-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK12:       omp_offload.failed5:
@@ -4982,7 +4950,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK12-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK12-NEXT:    [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
 // CHECK12-NEXT:    br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK12:       omp_offload.failed11:
@@ -4998,11 +4966,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK12-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK12-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5074,11 +5042,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK12-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK12-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..7
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5150,11 +5118,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK12-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK12-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..10
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5238,7 +5206,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK12-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK12-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK12-NEXT:    ret void

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp
index ac6c57b41bbe2..e7867e8a8bf98 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp
@@ -1010,14 +1010,14 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP9:%.*]] = load i64, i64* [[M_CASTED]], align 8
 // CHECK9-NEXT:    [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]]
 // CHECK9-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
-// CHECK9-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP13]], align 8
-// CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP15]], align 8
-// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP16]], align 8
+// CHECK9-NEXT:    [[TMP12:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP12]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes to i8*), i64 40, i1 false)
+// CHECK9-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP14]], align 8
+// CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP16]], align 8
 // CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK9-NEXT:    store i8* null, i8** [[TMP17]], align 8
 // CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1026,75 +1026,69 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP9]], i64* [[TMP21]], align 8
-// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 4, i64* [[TMP22]], align 8
-// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP25]], align 8
-// CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP27]], align 8
-// CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 8, i64* [[TMP28]], align 8
-// CHECK9-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP29]], align 8
-// CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP24]], align 8
+// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP26]], align 8
+// CHECK9-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP27]], align 8
+// CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP3]], i64* [[TMP29]], align 8
+// CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK9-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP3]], i64* [[TMP31]], align 8
-// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP3]], i64* [[TMP33]], align 8
-// CHECK9-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK9-NEXT:    store i64 8, i64* [[TMP34]], align 8
-// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP35]], align 8
-// CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP37]], align 8
-// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP39]], align 8
-// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK9-NEXT:    store i64 [[TMP11]], i64* [[TMP40]], align 8
-// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK9-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP45:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[TMP46:%.*]] = load i32, i32* [[M]], align 4
-// CHECK9-NEXT:    store i32 [[TMP46]], i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK9-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP32]], align 8
+// CHECK9-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP34]], align 8
+// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 8
+// CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK9-NEXT:    store i64 [[TMP11]], i64* [[TMP37]], align 8
+// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK9-NEXT:    store i8* null, i8** [[TMP38]], align 8
+// CHECK9-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP42:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[TMP43:%.*]] = load i32, i32* [[M]], align 4
+// CHECK9-NEXT:    store i32 [[TMP43]], i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK9-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK9-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK9-NEXT:    [[CONV5:%.*]] = sext i32 [[DIV]] to i64
-// CHECK9-NEXT:    [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK9-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP48]], 0
+// CHECK9-NEXT:    [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK9-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0
 // CHECK9-NEXT:    [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
 // CHECK9-NEXT:    [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
 // CHECK9-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]]
 // CHECK9-NEXT:    [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK9-NEXT:    store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK9-NEXT:    [[TMP49:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP49]], 1
+// CHECK9-NEXT:    [[TMP46:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
+// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP46]], 1
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK9-NEXT:    [[TMP50:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, i32 5, i8** [[TMP42]], i8** [[TMP43]], i64* [[TMP44]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
-// CHECK9-NEXT:    br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[TMP47:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, i32 5, i8** [[TMP39]], i8** [[TMP40]], i64* [[TMP41]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0
+// CHECK9-NEXT:    br i1 [[TMP48]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[TMP52:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP52]])
+// CHECK9-NEXT:    [[TMP49:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP49]])
 // CHECK9-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    [[TMP53:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP53]])
-// CHECK9-NEXT:    [[TMP54:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    ret i32 [[TMP54]]
+// CHECK9-NEXT:    [[TMP50:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP50]])
+// CHECK9-NEXT:    [[TMP51:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    ret i32 [[TMP51]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81
@@ -1408,7 +1402,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -1429,7 +1423,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 20)
-// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK9-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK9-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
@@ -1606,7 +1600,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK9-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK9-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK9-NEXT:    ret void
@@ -1659,14 +1653,14 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP9:%.*]] = load i64, i64* [[M_CASTED]], align 8
 // CHECK10-NEXT:    [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]]
 // CHECK10-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
-// CHECK10-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP13]], align 8
-// CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP15]], align 8
-// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP16]], align 8
+// CHECK10-NEXT:    [[TMP12:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP12]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes to i8*), i64 40, i1 false)
+// CHECK10-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP14]], align 8
+// CHECK10-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP16]], align 8
 // CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK10-NEXT:    store i8* null, i8** [[TMP17]], align 8
 // CHECK10-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1675,75 +1669,69 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP9]], i64* [[TMP21]], align 8
-// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 4, i64* [[TMP22]], align 8
-// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP25]], align 8
-// CHECK10-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP27]], align 8
-// CHECK10-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 8, i64* [[TMP28]], align 8
-// CHECK10-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP29]], align 8
-// CHECK10-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP24]], align 8
+// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP26]], align 8
+// CHECK10-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP27]], align 8
+// CHECK10-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP3]], i64* [[TMP29]], align 8
+// CHECK10-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK10-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP3]], i64* [[TMP31]], align 8
-// CHECK10-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK10-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP3]], i64* [[TMP33]], align 8
-// CHECK10-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK10-NEXT:    store i64 8, i64* [[TMP34]], align 8
-// CHECK10-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK10-NEXT:    store i8* null, i8** [[TMP35]], align 8
-// CHECK10-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK10-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP37]], align 8
-// CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK10-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP39]], align 8
-// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK10-NEXT:    store i64 [[TMP11]], i64* [[TMP40]], align 8
-// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK10-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK10-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP45:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[TMP46:%.*]] = load i32, i32* [[M]], align 4
-// CHECK10-NEXT:    store i32 [[TMP46]], i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK10-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK10-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK10-NEXT:    store i8* null, i8** [[TMP32]], align 8
+// CHECK10-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK10-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP34]], align 8
+// CHECK10-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK10-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 8
+// CHECK10-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK10-NEXT:    store i64 [[TMP11]], i64* [[TMP37]], align 8
+// CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK10-NEXT:    store i8* null, i8** [[TMP38]], align 8
+// CHECK10-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP42:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[TMP43:%.*]] = load i32, i32* [[M]], align 4
+// CHECK10-NEXT:    store i32 [[TMP43]], i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK10-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK10-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK10-NEXT:    [[CONV5:%.*]] = sext i32 [[DIV]] to i64
-// CHECK10-NEXT:    [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK10-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP48]], 0
+// CHECK10-NEXT:    [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK10-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0
 // CHECK10-NEXT:    [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
 // CHECK10-NEXT:    [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
 // CHECK10-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]]
 // CHECK10-NEXT:    [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK10-NEXT:    store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK10-NEXT:    [[TMP49:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP49]], 1
+// CHECK10-NEXT:    [[TMP46:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
+// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP46]], 1
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK10-NEXT:    [[TMP50:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, i32 5, i8** [[TMP42]], i8** [[TMP43]], i64* [[TMP44]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
-// CHECK10-NEXT:    br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK10-NEXT:    [[TMP47:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, i32 5, i8** [[TMP39]], i8** [[TMP40]], i64* [[TMP41]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0
+// CHECK10-NEXT:    br i1 [[TMP48]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
 // CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK10:       omp_offload.cont:
-// CHECK10-NEXT:    [[TMP52:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP52]])
+// CHECK10-NEXT:    [[TMP49:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP49]])
 // CHECK10-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    [[TMP53:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP53]])
-// CHECK10-NEXT:    [[TMP54:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    ret i32 [[TMP54]]
+// CHECK10-NEXT:    [[TMP50:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP50]])
+// CHECK10-NEXT:    [[TMP51:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK10-NEXT:    ret i32 [[TMP51]]
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81
@@ -2057,7 +2045,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -2078,7 +2066,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 20)
-// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK10-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK10-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
@@ -2255,7 +2243,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK10-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK10-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK10-NEXT:    ret void
@@ -2305,14 +2293,14 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP0]], [[TMP1]]
 // CHECK11-NEXT:    [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4
 // CHECK11-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
-// CHECK11-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP12]], align 4
-// CHECK11-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP14]], align 4
-// CHECK11-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP15]], align 4
+// CHECK11-NEXT:    [[TMP11:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes to i8*), i32 40, i1 false)
+// CHECK11-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP13]], align 4
+// CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP15]], align 4
 // CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK11-NEXT:    store i8* null, i8** [[TMP16]], align 4
 // CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2321,75 +2309,69 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP7]], i32* [[TMP20]], align 4
-// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP21]], align 4
-// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP22]], align 4
-// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP24]], align 4
-// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP26]], align 4
-// CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 4, i64* [[TMP27]], align 4
-// CHECK11-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP28]], align 4
-// CHECK11-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP21]], align 4
+// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP23]], align 4
+// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP25]], align 4
+// CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP26]], align 4
+// CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP1]], i32* [[TMP28]], align 4
+// CHECK11-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK11-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP1]], i32* [[TMP30]], align 4
-// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP1]], i32* [[TMP32]], align 4
-// CHECK11-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK11-NEXT:    store i64 4, i64* [[TMP33]], align 4
-// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP34]], align 4
-// CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 4
-// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP38]], align 4
-// CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK11-NEXT:    store i64 [[TMP10]], i64* [[TMP39]], align 4
-// CHECK11-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK11-NEXT:    store i8* null, i8** [[TMP40]], align 4
-// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP44:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP44]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[TMP45:%.*]] = load i32, i32* [[M]], align 4
-// CHECK11-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK11-NEXT:    [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP46]], 0
+// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP31]], align 4
+// CHECK11-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP33]], align 4
+// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP35]], align 4
+// CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK11-NEXT:    store i64 [[TMP10]], i64* [[TMP36]], align 4
+// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK11-NEXT:    store i8* null, i8** [[TMP37]], align 4
+// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP41:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP41]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[TMP42:%.*]] = load i32, i32* [[M]], align 4
+// CHECK11-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT:    [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0
 // CHECK11-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK11-NEXT:    [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK11-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK11-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK11-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK11-NEXT:    [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
 // CHECK11-NEXT:    [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
 // CHECK11-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
 // CHECK11-NEXT:    [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK11-NEXT:    store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK11-NEXT:    [[TMP48:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP48]], 1
+// CHECK11-NEXT:    [[TMP45:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP45]], 1
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK11-NEXT:    [[TMP49:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, i32 5, i8** [[TMP41]], i8** [[TMP42]], i64* [[TMP43]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0
-// CHECK11-NEXT:    br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[TMP46:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, i32 5, i8** [[TMP38]], i8** [[TMP39]], i64* [[TMP40]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0
+// CHECK11-NEXT:    br i1 [[TMP47]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[TMP51:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP51]])
+// CHECK11-NEXT:    [[TMP48:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP48]])
 // CHECK11-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    [[TMP52:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP52]])
-// CHECK11-NEXT:    [[TMP53:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    ret i32 [[TMP53]]
+// CHECK11-NEXT:    [[TMP49:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP49]])
+// CHECK11-NEXT:    [[TMP50:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK11-NEXT:    ret i32 [[TMP50]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81
@@ -2695,7 +2677,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -2716,7 +2698,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 20)
-// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK11-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK11-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
@@ -2887,7 +2869,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK11-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK11-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK11-NEXT:    ret void
@@ -2937,14 +2919,14 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP0]], [[TMP1]]
 // CHECK12-NEXT:    [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4
 // CHECK12-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
-// CHECK12-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP12]], align 4
-// CHECK12-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP14]], align 4
-// CHECK12-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP15]], align 4
+// CHECK12-NEXT:    [[TMP11:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes to i8*), i32 40, i1 false)
+// CHECK12-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP13]], align 4
+// CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP15]], align 4
 // CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK12-NEXT:    store i8* null, i8** [[TMP16]], align 4
 // CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2953,75 +2935,69 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP7]], i32* [[TMP20]], align 4
-// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP21]], align 4
-// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP22]], align 4
-// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP24]], align 4
-// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP26]], align 4
-// CHECK12-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 4, i64* [[TMP27]], align 4
-// CHECK12-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP28]], align 4
-// CHECK12-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP21]], align 4
+// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP23]], align 4
+// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP25]], align 4
+// CHECK12-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP26]], align 4
+// CHECK12-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP1]], i32* [[TMP28]], align 4
+// CHECK12-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK12-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP1]], i32* [[TMP30]], align 4
-// CHECK12-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK12-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP1]], i32* [[TMP32]], align 4
-// CHECK12-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK12-NEXT:    store i64 4, i64* [[TMP33]], align 4
-// CHECK12-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK12-NEXT:    store i8* null, i8** [[TMP34]], align 4
-// CHECK12-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK12-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 4
-// CHECK12-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK12-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP38]], align 4
-// CHECK12-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK12-NEXT:    store i64 [[TMP10]], i64* [[TMP39]], align 4
-// CHECK12-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK12-NEXT:    store i8* null, i8** [[TMP40]], align 4
-// CHECK12-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP44:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP44]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[TMP45:%.*]] = load i32, i32* [[M]], align 4
-// CHECK12-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK12-NEXT:    [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP46]], 0
+// CHECK12-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK12-NEXT:    store i8* null, i8** [[TMP31]], align 4
+// CHECK12-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK12-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP33]], align 4
+// CHECK12-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK12-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP35]], align 4
+// CHECK12-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK12-NEXT:    store i64 [[TMP10]], i64* [[TMP36]], align 4
+// CHECK12-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK12-NEXT:    store i8* null, i8** [[TMP37]], align 4
+// CHECK12-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP41:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP41]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[TMP42:%.*]] = load i32, i32* [[M]], align 4
+// CHECK12-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK12-NEXT:    [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0
 // CHECK12-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK12-NEXT:    [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK12-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK12-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK12-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK12-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK12-NEXT:    [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
 // CHECK12-NEXT:    [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
 // CHECK12-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
 // CHECK12-NEXT:    [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK12-NEXT:    store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK12-NEXT:    [[TMP48:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP48]], 1
+// CHECK12-NEXT:    [[TMP45:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP45]], 1
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK12-NEXT:    [[TMP49:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, i32 5, i8** [[TMP41]], i8** [[TMP42]], i64* [[TMP43]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0
-// CHECK12-NEXT:    br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK12-NEXT:    [[TMP46:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, i32 5, i8** [[TMP38]], i8** [[TMP39]], i64* [[TMP40]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0
+// CHECK12-NEXT:    br i1 [[TMP47]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
 // CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK12:       omp_offload.cont:
-// CHECK12-NEXT:    [[TMP51:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP51]])
+// CHECK12-NEXT:    [[TMP48:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP48]])
 // CHECK12-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    [[TMP52:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP52]])
-// CHECK12-NEXT:    [[TMP53:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    ret i32 [[TMP53]]
+// CHECK12-NEXT:    [[TMP49:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP49]])
+// CHECK12-NEXT:    [[TMP50:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK12-NEXT:    ret i32 [[TMP50]]
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81
@@ -3327,7 +3303,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK12-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -3348,7 +3324,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 20)
-// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK12-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK12-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
@@ -3519,7 +3495,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK12-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK12-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK12-NEXT:    ret void

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp
index ae835d3b4cc26..edbaa78398bc4 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp
@@ -2432,14 +2432,14 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK9-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK9-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK9-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK9-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK9-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK9-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK9-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK9-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK9-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2448,176 +2448,168 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK9-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK9-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK9-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK9-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK9-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK9-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK9-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK9-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK9-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK9-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK9-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK9-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK9-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK9-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK9-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK9-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i64 24, i1 false)
 // CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK9-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK9-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK9-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK9-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK9-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK9-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK9-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK9-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK9-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK9-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK9-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK9-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK9-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK9-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK9-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK9-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK9-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP60]])
-// CHECK9-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK9-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK9-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK9-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK9-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP58]])
+// CHECK9-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK9-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK9:       omp_offload.failed16:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK9:       omp_offload.cont17:
-// CHECK9-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M]], align 4
-// CHECK9-NEXT:    store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK9-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[TMP61:%.*]] = load i32, i32* [[M]], align 4
+// CHECK9-NEXT:    store i32 [[TMP61]], i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK9-NEXT:    [[TMP62:%.*]] = load i32, i32* [[N]], align 4
 // CHECK9-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP64]], i32* [[CONV20]], align 4
-// CHECK9-NEXT:    [[TMP65:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK9-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK9-NEXT:    store i32 [[TMP62]], i32* [[CONV20]], align 4
+// CHECK9-NEXT:    [[TMP63:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK9-NEXT:    [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
 // CHECK9-NEXT:    [[CONV21:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP66]], i32* [[CONV21]], align 4
-// CHECK9-NEXT:    [[TMP67:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK9-NEXT:    [[TMP68:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK9-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP65]], i64* [[TMP70]], align 8
-// CHECK9-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP65]], i64* [[TMP72]], align 8
-// CHECK9-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP73]], align 8
-// CHECK9-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP74]], align 8
-// CHECK9-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 [[TMP64]], i32* [[CONV21]], align 4
+// CHECK9-NEXT:    [[TMP65:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK9-NEXT:    [[TMP66:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK9-NEXT:    [[TMP67:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES25]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP67]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i64 32, i1 false)
+// CHECK9-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP63]], i64* [[TMP69]], align 8
+// CHECK9-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP63]], i64* [[TMP71]], align 8
+// CHECK9-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP72]], align 8
+// CHECK9-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP74]], align 8
+// CHECK9-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP76]], align 8
-// CHECK9-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
-// CHECK9-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 8, i64* [[TMP79]], align 8
-// CHECK9-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP80]], align 8
-// CHECK9-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP82]], align 8
-// CHECK9-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 8
-// CHECK9-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 [[TMP68]], i64* [[TMP85]], align 8
-// CHECK9-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP86]], align 8
-// CHECK9-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 8
-// CHECK9-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK9-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 3
-// CHECK9-NEXT:    store i64 4, i64* [[TMP91]], align 8
-// CHECK9-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP92]], align 8
-// CHECK9-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP96:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP96]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK9-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK9-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP97]], 0
+// CHECK9-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP77]], align 8
+// CHECK9-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP79]], align 8
+// CHECK9-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 8
+// CHECK9-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP66]], i64* [[TMP82]], align 8
+// CHECK9-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP83]], align 8
+// CHECK9-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP65]], i64* [[TMP85]], align 8
+// CHECK9-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP65]], i64* [[TMP87]], align 8
+// CHECK9-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP88]], align 8
+// CHECK9-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP92:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP92]], i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK9-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK9-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP93]], 0
 // CHECK9-NEXT:    [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1
 // CHECK9-NEXT:    [[SUB31:%.*]] = sub nsw i32 [[DIV30]], 1
 // CHECK9-NEXT:    store i32 [[SUB31]], i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK9-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK9-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP98]], 1
-// CHECK9-NEXT:    [[TMP99:%.*]] = zext i32 [[ADD32]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP99]])
-// CHECK9-NEXT:    [[TMP100:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108.region_id, i32 4, i8** [[TMP93]], i8** [[TMP94]], i64* [[TMP95]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP101:%.*]] = icmp ne i32 [[TMP100]], 0
-// CHECK9-NEXT:    br i1 [[TMP101]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
+// CHECK9-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
+// CHECK9-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP94]], 1
+// CHECK9-NEXT:    [[TMP95:%.*]] = zext i32 [[ADD32]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP95]])
+// CHECK9-NEXT:    [[TMP96:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108.region_id, i32 4, i8** [[TMP89]], i8** [[TMP90]], i64* [[TMP91]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP97:%.*]] = icmp ne i32 [[TMP96]], 0
+// CHECK9-NEXT:    br i1 [[TMP97]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
 // CHECK9:       omp_offload.failed33:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108(i64 [[TMP65]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP67]]) #[[ATTR3]]
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108(i64 [[TMP63]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP65]]) #[[ATTR3]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT34]]
 // CHECK9:       omp_offload.cont34:
-// CHECK9-NEXT:    [[TMP102:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP102]])
+// CHECK9-NEXT:    [[TMP98:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP98]])
 // CHECK9-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    [[TMP103:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP103]])
-// CHECK9-NEXT:    [[TMP104:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    ret i32 [[TMP104]]
+// CHECK9-NEXT:    [[TMP99:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP99]])
+// CHECK9-NEXT:    [[TMP100:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    ret i32 [[TMP100]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100
@@ -3089,11 +3081,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK9-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK9-NEXT:    [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3180,7 +3172,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK9-NEXT:    store i32 [[TMP22]], i32* [[CONV9]], align 4
 // CHECK9-NEXT:    [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]])
 // CHECK9-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK9:       omp.inner.for.inc:
 // CHECK9-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -3222,7 +3214,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3331,7 +3323,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -3363,7 +3355,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l81.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l81.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK9-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK9-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
@@ -3381,7 +3373,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK9-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l85.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.13, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l85.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK9-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK9-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK9:       omp_offload.failed5:
@@ -3413,7 +3405,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK9-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK9-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK9-NEXT:    br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK9:       omp_offload.failed11:
@@ -3429,11 +3421,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK9-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3481,7 +3473,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK9-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK9-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK9-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK9:       omp.inner.for.inc:
 // CHECK9-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -3496,7 +3488,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3579,11 +3571,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK9-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3631,7 +3623,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK9-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK9-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK9-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK9:       omp.inner.for.inc:
 // CHECK9-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -3646,7 +3638,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3737,11 +3729,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK9-NEXT:    store i32 [[TMP1]], i32* [[CONV1]], align 4
 // CHECK9-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3797,7 +3789,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK9-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4
 // CHECK9-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
 // CHECK9-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK9:       omp.inner.for.inc:
 // CHECK9-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -3833,7 +3825,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3914,7 +3906,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK9-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK9-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK9-NEXT:    ret void
@@ -3972,14 +3964,14 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK10-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK10-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK10-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK10-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK10-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK10-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK10-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK10-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK10-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK10-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -3988,176 +3980,168 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK10-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK10-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK10-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK10-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK10-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK10-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK10-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK10-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK10-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK10-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK10-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK10-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK10-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK10-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK10-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
 // CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK10:       omp_offload.cont:
-// CHECK10-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK10-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK10-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK10-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK10-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK10-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK10-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK10-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK10-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i64 24, i1 false)
 // CHECK10-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK10-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK10-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK10-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK10-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK10-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK10-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK10-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK10-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK10-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK10-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK10-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK10-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK10-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK10-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK10-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK10-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK10-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK10-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK10-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK10-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK10-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK10-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK10-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK10-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK10-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK10-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK10-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK10-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK10-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK10-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK10-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK10-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK10-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK10-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP60]])
-// CHECK10-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK10-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK10-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK10-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK10-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP58]])
+// CHECK10-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK10-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK10:       omp_offload.failed16:
-// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK10:       omp_offload.cont17:
-// CHECK10-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M]], align 4
-// CHECK10-NEXT:    store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK10-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    [[TMP61:%.*]] = load i32, i32* [[M]], align 4
+// CHECK10-NEXT:    store i32 [[TMP61]], i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK10-NEXT:    [[TMP62:%.*]] = load i32, i32* [[N]], align 4
 // CHECK10-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK10-NEXT:    store i32 [[TMP64]], i32* [[CONV20]], align 4
-// CHECK10-NEXT:    [[TMP65:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK10-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK10-NEXT:    store i32 [[TMP62]], i32* [[CONV20]], align 4
+// CHECK10-NEXT:    [[TMP63:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK10-NEXT:    [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
 // CHECK10-NEXT:    [[CONV21:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
-// CHECK10-NEXT:    store i32 [[TMP66]], i32* [[CONV21]], align 4
-// CHECK10-NEXT:    [[TMP67:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK10-NEXT:    [[TMP68:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK10-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP65]], i64* [[TMP70]], align 8
-// CHECK10-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP65]], i64* [[TMP72]], align 8
-// CHECK10-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP73]], align 8
-// CHECK10-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
-// CHECK10-NEXT:    store i8* null, i8** [[TMP74]], align 8
-// CHECK10-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK10-NEXT:    store i32 [[TMP64]], i32* [[CONV21]], align 4
+// CHECK10-NEXT:    [[TMP65:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK10-NEXT:    [[TMP66:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK10-NEXT:    [[TMP67:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES25]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP67]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i64 32, i1 false)
+// CHECK10-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP63]], i64* [[TMP69]], align 8
+// CHECK10-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP63]], i64* [[TMP71]], align 8
+// CHECK10-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
+// CHECK10-NEXT:    store i8* null, i8** [[TMP72]], align 8
+// CHECK10-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK10-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP74]], align 8
+// CHECK10-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP76]], align 8
-// CHECK10-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
-// CHECK10-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
-// CHECK10-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 8, i64* [[TMP79]], align 8
-// CHECK10-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP80]], align 8
-// CHECK10-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP82]], align 8
-// CHECK10-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 8
-// CHECK10-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 [[TMP68]], i64* [[TMP85]], align 8
-// CHECK10-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP86]], align 8
-// CHECK10-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
-// CHECK10-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 8
-// CHECK10-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
-// CHECK10-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK10-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 3
-// CHECK10-NEXT:    store i64 4, i64* [[TMP91]], align 8
-// CHECK10-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
-// CHECK10-NEXT:    store i8* null, i8** [[TMP92]], align 8
-// CHECK10-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP96:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP96]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK10-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK10-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP97]], 0
+// CHECK10-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP77]], align 8
+// CHECK10-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP79]], align 8
+// CHECK10-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 8
+// CHECK10-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP66]], i64* [[TMP82]], align 8
+// CHECK10-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP83]], align 8
+// CHECK10-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP65]], i64* [[TMP85]], align 8
+// CHECK10-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP65]], i64* [[TMP87]], align 8
+// CHECK10-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
+// CHECK10-NEXT:    store i8* null, i8** [[TMP88]], align 8
+// CHECK10-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP92:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP92]], i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK10-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK10-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP93]], 0
 // CHECK10-NEXT:    [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1
 // CHECK10-NEXT:    [[SUB31:%.*]] = sub nsw i32 [[DIV30]], 1
 // CHECK10-NEXT:    store i32 [[SUB31]], i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK10-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK10-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP98]], 1
-// CHECK10-NEXT:    [[TMP99:%.*]] = zext i32 [[ADD32]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP99]])
-// CHECK10-NEXT:    [[TMP100:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108.region_id, i32 4, i8** [[TMP93]], i8** [[TMP94]], i64* [[TMP95]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP101:%.*]] = icmp ne i32 [[TMP100]], 0
-// CHECK10-NEXT:    br i1 [[TMP101]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
+// CHECK10-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
+// CHECK10-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP94]], 1
+// CHECK10-NEXT:    [[TMP95:%.*]] = zext i32 [[ADD32]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP95]])
+// CHECK10-NEXT:    [[TMP96:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108.region_id, i32 4, i8** [[TMP89]], i8** [[TMP90]], i64* [[TMP91]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP97:%.*]] = icmp ne i32 [[TMP96]], 0
+// CHECK10-NEXT:    br i1 [[TMP97]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
 // CHECK10:       omp_offload.failed33:
-// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108(i64 [[TMP65]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP67]]) #[[ATTR3]]
+// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108(i64 [[TMP63]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP65]]) #[[ATTR3]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT34]]
 // CHECK10:       omp_offload.cont34:
-// CHECK10-NEXT:    [[TMP102:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP102]])
+// CHECK10-NEXT:    [[TMP98:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP98]])
 // CHECK10-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    [[TMP103:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP103]])
-// CHECK10-NEXT:    [[TMP104:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    ret i32 [[TMP104]]
+// CHECK10-NEXT:    [[TMP99:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP99]])
+// CHECK10-NEXT:    [[TMP100:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK10-NEXT:    ret i32 [[TMP100]]
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100
@@ -4629,11 +4613,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK10-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK10-NEXT:    [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -4720,7 +4704,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK10-NEXT:    store i32 [[TMP22]], i32* [[CONV9]], align 4
 // CHECK10-NEXT:    [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]])
 // CHECK10-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK10:       omp.inner.for.inc:
 // CHECK10-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -4762,7 +4746,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -4871,7 +4855,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -4903,7 +4887,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l81.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l81.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK10-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK10-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
@@ -4921,7 +4905,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK10-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l85.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.13, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l85.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK10-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK10-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK10:       omp_offload.failed5:
@@ -4953,7 +4937,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK10-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK10-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK10-NEXT:    br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK10:       omp_offload.failed11:
@@ -4969,11 +4953,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK10-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK10-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -5021,7 +5005,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK10-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK10-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK10-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK10:       omp.inner.for.inc:
 // CHECK10-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -5036,7 +5020,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -5119,11 +5103,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK10-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK10-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -5171,7 +5155,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK10-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK10-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK10-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK10:       omp.inner.for.inc:
 // CHECK10-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -5186,7 +5170,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -5277,11 +5261,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK10-NEXT:    store i32 [[TMP1]], i32* [[CONV1]], align 4
 // CHECK10-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -5337,7 +5321,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK10-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4
 // CHECK10-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
 // CHECK10-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK10:       omp.inner.for.inc:
 // CHECK10-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -5373,7 +5357,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -5454,7 +5438,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK10-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK10-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK10-NEXT:    ret void
@@ -5511,14 +5495,14 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK11-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK11-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK11-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK11-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK11-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK11-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK11-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK11-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK11-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -5527,175 +5511,167 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK11-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK11-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK11-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK11-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK11-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK11-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK11-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK11-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK11-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK11-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK11-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK11-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK11-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK11-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK11-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK11-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK11-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK11-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK11-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i32 24, i1 false)
 // CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK11-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK11-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK11-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK11-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK11-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK11-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK11-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK11-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK11-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK11-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK11-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK11-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK11-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK11-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK11-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK11-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK11-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK11-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK11-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK11-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK11-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK11-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK11-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP61]])
-// CHECK11-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK11-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK11-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK11-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK11-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP59]])
+// CHECK11-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK11-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK11:       omp_offload.failed15:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK11:       omp_offload.cont16:
-// CHECK11-NEXT:    [[TMP64:%.*]] = load i32, i32* [[M]], align 4
-// CHECK11-NEXT:    store i32 [[TMP64]], i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK11-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP65]], i32* [[N_CASTED18]], align 4
-// CHECK11-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N_CASTED18]], align 4
-// CHECK11-NEXT:    [[TMP67:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK11-NEXT:    store i32 [[TMP67]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK11-NEXT:    [[TMP68:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK11-NEXT:    [[TMP69:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK11-NEXT:    [[TMP70:%.*]] = sext i32 [[TMP69]] to i64
-// CHECK11-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP66]], i32* [[TMP72]], align 4
-// CHECK11-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP66]], i32* [[TMP74]], align 4
-// CHECK11-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP75]], align 4
-// CHECK11-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP76]], align 4
-// CHECK11-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP62:%.*]] = load i32, i32* [[M]], align 4
+// CHECK11-NEXT:    store i32 [[TMP62]], i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK11-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP63]], i32* [[N_CASTED18]], align 4
+// CHECK11-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N_CASTED18]], align 4
+// CHECK11-NEXT:    [[TMP65:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK11-NEXT:    store i32 [[TMP65]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK11-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK11-NEXT:    [[TMP67:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK11-NEXT:    [[TMP68:%.*]] = sext i32 [[TMP67]] to i64
+// CHECK11-NEXT:    [[TMP69:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES22]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP69]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i32 32, i1 false)
+// CHECK11-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP64]], i32* [[TMP71]], align 4
+// CHECK11-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP64]], i32* [[TMP73]], align 4
+// CHECK11-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP74]], align 4
+// CHECK11-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP76]], align 4
+// CHECK11-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP78]], align 4
-// CHECK11-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
-// CHECK11-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP81]], align 4
-// CHECK11-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP82]], align 4
-// CHECK11-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 4
-// CHECK11-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP86]], align 4
-// CHECK11-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 [[TMP70]], i64* [[TMP87]], align 4
-// CHECK11-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP88]], align 4
-// CHECK11-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP68]], i32* [[TMP90]], align 4
-// CHECK11-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP68]], i32* [[TMP92]], align 4
-// CHECK11-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 3
-// CHECK11-NEXT:    store i64 4, i64* [[TMP93]], align 4
-// CHECK11-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP94]], align 4
-// CHECK11-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP98:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP98]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK11-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK11-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP99]], 0
+// CHECK11-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP79]], align 4
+// CHECK11-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 4
+// CHECK11-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 4
+// CHECK11-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
+// CHECK11-NEXT:    store i64 [[TMP68]], i64* [[TMP84]], align 4
+// CHECK11-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP85]], align 4
+// CHECK11-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP66]], i32* [[TMP87]], align 4
+// CHECK11-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP66]], i32* [[TMP89]], align 4
+// CHECK11-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP90]], align 4
+// CHECK11-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP94:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP94]], i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK11-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK11-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP95]], 0
 // CHECK11-NEXT:    [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1
 // CHECK11-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[DIV27]], 1
 // CHECK11-NEXT:    store i32 [[SUB28]], i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK11-NEXT:    [[TMP100:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK11-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP100]], 1
-// CHECK11-NEXT:    [[TMP101:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP101]])
-// CHECK11-NEXT:    [[TMP102:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108.region_id, i32 4, i8** [[TMP95]], i8** [[TMP96]], i64* [[TMP97]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP103:%.*]] = icmp ne i32 [[TMP102]], 0
-// CHECK11-NEXT:    br i1 [[TMP103]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK11-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK11-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP96]], 1
+// CHECK11-NEXT:    [[TMP97:%.*]] = zext i32 [[ADD29]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP97]])
+// CHECK11-NEXT:    [[TMP98:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108.region_id, i32 4, i8** [[TMP91]], i8** [[TMP92]], i64* [[TMP93]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP99:%.*]] = icmp ne i32 [[TMP98]], 0
+// CHECK11-NEXT:    br i1 [[TMP99]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
 // CHECK11:       omp_offload.failed30:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108(i32 [[TMP66]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP68]]) #[[ATTR3]]
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108(i32 [[TMP64]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP66]]) #[[ATTR3]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
 // CHECK11:       omp_offload.cont31:
-// CHECK11-NEXT:    [[TMP104:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP104]])
+// CHECK11-NEXT:    [[TMP100:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP100]])
 // CHECK11-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    [[TMP105:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP105]])
-// CHECK11-NEXT:    [[TMP106:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    ret i32 [[TMP106]]
+// CHECK11-NEXT:    [[TMP101:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP101]])
+// CHECK11-NEXT:    [[TMP102:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK11-NEXT:    ret i32 [[TMP102]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100
@@ -6143,11 +6119,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK11-NEXT:    store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK11-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -6228,7 +6204,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK11-NEXT:    store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]])
 // CHECK11-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK11:       omp.inner.for.inc:
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -6270,7 +6246,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -6374,7 +6350,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -6406,7 +6382,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l81.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l81.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK11-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK11-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
@@ -6424,7 +6400,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK11-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l85.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.13, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l85.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK11-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK11-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK11:       omp_offload.failed5:
@@ -6455,7 +6431,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK11-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK11-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK11-NEXT:    br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK11:       omp_offload.failed11:
@@ -6471,11 +6447,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK11-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -6521,7 +6497,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.body:
 // CHECK11-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK11-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK11-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK11:       omp.inner.for.inc:
 // CHECK11-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -6536,7 +6512,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -6616,11 +6592,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK11-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -6666,7 +6642,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.body:
 // CHECK11-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK11-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK11-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK11:       omp.inner.for.inc:
 // CHECK11-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -6681,7 +6657,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -6767,11 +6743,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK11-NEXT:    store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK11-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -6823,7 +6799,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK11-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
 // CHECK11-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK11:       omp.inner.for.inc:
 // CHECK11-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -6859,7 +6835,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -6936,7 +6912,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK11-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK11-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK11-NEXT:    ret void
@@ -6993,14 +6969,14 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK12-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK12-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK12-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK12-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK12-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK12-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK12-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK12-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK12-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK12-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -7009,175 +6985,167 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK12-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK12-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK12-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK12-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK12-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK12-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK12-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK12-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK12-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK12-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK12-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK12-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK12-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK12-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK12-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
 // CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK12:       omp_offload.cont:
-// CHECK12-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK12-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK12-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK12-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK12-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK12-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK12-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK12-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK12-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i32 24, i1 false)
 // CHECK12-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK12-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK12-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK12-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK12-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK12-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK12-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK12-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK12-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK12-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK12-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK12-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK12-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK12-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK12-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK12-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK12-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK12-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK12-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK12-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK12-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK12-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK12-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK12-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK12-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK12-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK12-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK12-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK12-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK12-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK12-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK12-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK12-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK12-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK12-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK12-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK12-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK12-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK12-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK12-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP61]])
-// CHECK12-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK12-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK12-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK12-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK12-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP59]])
+// CHECK12-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK12-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK12:       omp_offload.failed15:
-// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK12:       omp_offload.cont16:
-// CHECK12-NEXT:    [[TMP64:%.*]] = load i32, i32* [[M]], align 4
-// CHECK12-NEXT:    store i32 [[TMP64]], i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK12-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP65]], i32* [[N_CASTED18]], align 4
-// CHECK12-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N_CASTED18]], align 4
-// CHECK12-NEXT:    [[TMP67:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK12-NEXT:    store i32 [[TMP67]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK12-NEXT:    [[TMP68:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK12-NEXT:    [[TMP69:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK12-NEXT:    [[TMP70:%.*]] = sext i32 [[TMP69]] to i64
-// CHECK12-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP66]], i32* [[TMP72]], align 4
-// CHECK12-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP66]], i32* [[TMP74]], align 4
-// CHECK12-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP75]], align 4
-// CHECK12-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
-// CHECK12-NEXT:    store i8* null, i8** [[TMP76]], align 4
-// CHECK12-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP62:%.*]] = load i32, i32* [[M]], align 4
+// CHECK12-NEXT:    store i32 [[TMP62]], i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK12-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP63]], i32* [[N_CASTED18]], align 4
+// CHECK12-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N_CASTED18]], align 4
+// CHECK12-NEXT:    [[TMP65:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK12-NEXT:    store i32 [[TMP65]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK12-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK12-NEXT:    [[TMP67:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK12-NEXT:    [[TMP68:%.*]] = sext i32 [[TMP67]] to i64
+// CHECK12-NEXT:    [[TMP69:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES22]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP69]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i32 32, i1 false)
+// CHECK12-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP64]], i32* [[TMP71]], align 4
+// CHECK12-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP64]], i32* [[TMP73]], align 4
+// CHECK12-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
+// CHECK12-NEXT:    store i8* null, i8** [[TMP74]], align 4
+// CHECK12-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP76]], align 4
+// CHECK12-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP78]], align 4
-// CHECK12-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
-// CHECK12-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
-// CHECK12-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP81]], align 4
-// CHECK12-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP82]], align 4
-// CHECK12-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 4
-// CHECK12-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP86]], align 4
-// CHECK12-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 [[TMP70]], i64* [[TMP87]], align 4
-// CHECK12-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP88]], align 4
-// CHECK12-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
-// CHECK12-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP68]], i32* [[TMP90]], align 4
-// CHECK12-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
-// CHECK12-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP68]], i32* [[TMP92]], align 4
-// CHECK12-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 3
-// CHECK12-NEXT:    store i64 4, i64* [[TMP93]], align 4
-// CHECK12-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
-// CHECK12-NEXT:    store i8* null, i8** [[TMP94]], align 4
-// CHECK12-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP98:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP98]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK12-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK12-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP99]], 0
+// CHECK12-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP79]], align 4
+// CHECK12-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 4
+// CHECK12-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 4
+// CHECK12-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
+// CHECK12-NEXT:    store i64 [[TMP68]], i64* [[TMP84]], align 4
+// CHECK12-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP85]], align 4
+// CHECK12-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP66]], i32* [[TMP87]], align 4
+// CHECK12-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP66]], i32* [[TMP89]], align 4
+// CHECK12-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
+// CHECK12-NEXT:    store i8* null, i8** [[TMP90]], align 4
+// CHECK12-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP94:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP94]], i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK12-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK12-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP95]], 0
 // CHECK12-NEXT:    [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1
 // CHECK12-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[DIV27]], 1
 // CHECK12-NEXT:    store i32 [[SUB28]], i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK12-NEXT:    [[TMP100:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK12-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP100]], 1
-// CHECK12-NEXT:    [[TMP101:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP101]])
-// CHECK12-NEXT:    [[TMP102:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108.region_id, i32 4, i8** [[TMP95]], i8** [[TMP96]], i64* [[TMP97]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP103:%.*]] = icmp ne i32 [[TMP102]], 0
-// CHECK12-NEXT:    br i1 [[TMP103]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK12-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK12-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP96]], 1
+// CHECK12-NEXT:    [[TMP97:%.*]] = zext i32 [[ADD29]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP97]])
+// CHECK12-NEXT:    [[TMP98:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108.region_id, i32 4, i8** [[TMP91]], i8** [[TMP92]], i64* [[TMP93]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP99:%.*]] = icmp ne i32 [[TMP98]], 0
+// CHECK12-NEXT:    br i1 [[TMP99]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
 // CHECK12:       omp_offload.failed30:
-// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108(i32 [[TMP66]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP68]]) #[[ATTR3]]
+// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108(i32 [[TMP64]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP66]]) #[[ATTR3]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
 // CHECK12:       omp_offload.cont31:
-// CHECK12-NEXT:    [[TMP104:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP104]])
+// CHECK12-NEXT:    [[TMP100:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP100]])
 // CHECK12-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    [[TMP105:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP105]])
-// CHECK12-NEXT:    [[TMP106:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    ret i32 [[TMP106]]
+// CHECK12-NEXT:    [[TMP101:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP101]])
+// CHECK12-NEXT:    [[TMP102:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK12-NEXT:    ret i32 [[TMP102]]
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100
@@ -7625,11 +7593,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK12-NEXT:    store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK12-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -7710,7 +7678,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK12-NEXT:    store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK12-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]])
 // CHECK12-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK12:       omp.inner.for.inc:
 // CHECK12-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -7752,7 +7720,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -7856,7 +7824,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK12-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -7888,7 +7856,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l81.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l81.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK12-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK12-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
@@ -7906,7 +7874,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK12-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l85.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.13, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l85.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK12-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK12-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK12:       omp_offload.failed5:
@@ -7937,7 +7905,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK12-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK12-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK12-NEXT:    br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK12:       omp_offload.failed11:
@@ -7953,11 +7921,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK12-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK12-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -8003,7 +7971,7 @@ int main (int argc, char **argv) {
 // CHECK12:       omp.inner.for.body:
 // CHECK12-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK12-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK12-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK12:       omp.inner.for.inc:
 // CHECK12-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -8018,7 +7986,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -8098,11 +8066,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK12-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK12-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -8148,7 +8116,7 @@ int main (int argc, char **argv) {
 // CHECK12:       omp.inner.for.body:
 // CHECK12-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK12-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK12-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK12:       omp.inner.for.inc:
 // CHECK12-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -8163,7 +8131,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -8249,11 +8217,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK12-NEXT:    store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK12-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -8305,7 +8273,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK12-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK12-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
 // CHECK12-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK12:       omp.inner.for.inc:
 // CHECK12-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -8341,7 +8309,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -8418,7 +8386,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK12-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK12-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK12-NEXT:    ret void

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp
index 55181a136d77c..f0ab5a2d88952 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp
@@ -7468,14 +7468,14 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK13-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK13-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK13-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK13-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK13-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK13-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK13-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK13-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK13-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK13-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK13-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK13-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK13-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -7484,304 +7484,290 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK13-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK13-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK13-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK13-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK13-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK13-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK13-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK13-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK13-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK13-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK13-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK13-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK13-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK13-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK13-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK13-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK13-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK13-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK13-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK13-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK13-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK13-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK13-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK13-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK13-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK13-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK13-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK13-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK13-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK13-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK13-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK13-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK13-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK13-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK13-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK13-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK13-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK13-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK13-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK13:       omp_offload.failed:
 // CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK13:       omp_offload.cont:
-// CHECK13-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK13-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK13-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK13-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK13-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK13-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK13-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK13-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK13-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK13-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i64 24, i1 false)
 // CHECK13-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK13-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK13-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK13-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK13-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK13-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK13-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK13-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK13-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK13-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK13-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK13-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK13-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK13-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK13-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK13-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK13-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK13-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK13-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK13-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK13-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK13-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK13-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK13-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK13-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK13-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK13-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK13-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK13-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK13-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK13-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK13-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK13-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK13-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK13-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK13-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK13-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK13-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK13-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK13-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK13-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK13-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK13-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK13-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK13-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK13-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK13-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK13-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP60]])
-// CHECK13-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK13-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK13-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK13-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK13-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP58]])
+// CHECK13-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK13-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK13:       omp_offload.failed16:
-// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK13:       omp_offload.cont17:
-// CHECK13-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M]], align 4
-// CHECK13-NEXT:    store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK13-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    [[TMP61:%.*]] = load i32, i32* [[M]], align 4
+// CHECK13-NEXT:    store i32 [[TMP61]], i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK13-NEXT:    [[TMP62:%.*]] = load i32, i32* [[N]], align 4
 // CHECK13-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK13-NEXT:    store i32 [[TMP64]], i32* [[CONV20]], align 4
-// CHECK13-NEXT:    [[TMP65:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK13-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK13-NEXT:    store i32 [[TMP62]], i32* [[CONV20]], align 4
+// CHECK13-NEXT:    [[TMP63:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK13-NEXT:    [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
 // CHECK13-NEXT:    [[CONV21:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
-// CHECK13-NEXT:    store i32 [[TMP66]], i32* [[CONV21]], align 4
-// CHECK13-NEXT:    [[TMP67:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    [[TMP68:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK13-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP65]], i64* [[TMP70]], align 8
-// CHECK13-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP65]], i64* [[TMP72]], align 8
-// CHECK13-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK13-NEXT:    store i64 4, i64* [[TMP73]], align 8
-// CHECK13-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
-// CHECK13-NEXT:    store i8* null, i8** [[TMP74]], align 8
-// CHECK13-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK13-NEXT:    store i32 [[TMP64]], i32* [[CONV21]], align 4
+// CHECK13-NEXT:    [[TMP65:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK13-NEXT:    [[TMP66:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK13-NEXT:    [[TMP67:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES25]] to i8*
+// CHECK13-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP67]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i64 32, i1 false)
+// CHECK13-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP63]], i64* [[TMP69]], align 8
+// CHECK13-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP63]], i64* [[TMP71]], align 8
+// CHECK13-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
+// CHECK13-NEXT:    store i8* null, i8** [[TMP72]], align 8
+// CHECK13-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP74]], align 8
+// CHECK13-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
 // CHECK13-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i64*
 // CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP76]], align 8
-// CHECK13-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
-// CHECK13-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
-// CHECK13-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 1
-// CHECK13-NEXT:    store i64 8, i64* [[TMP79]], align 8
-// CHECK13-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
-// CHECK13-NEXT:    store i8* null, i8** [[TMP80]], align 8
-// CHECK13-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP82]], align 8
-// CHECK13-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 8
-// CHECK13-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
-// CHECK13-NEXT:    store i64 [[TMP68]], i64* [[TMP85]], align 8
-// CHECK13-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
-// CHECK13-NEXT:    store i8* null, i8** [[TMP86]], align 8
-// CHECK13-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
-// CHECK13-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 8
-// CHECK13-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
-// CHECK13-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK13-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 3
-// CHECK13-NEXT:    store i64 4, i64* [[TMP91]], align 8
-// CHECK13-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
-// CHECK13-NEXT:    store i8* null, i8** [[TMP92]], align 8
-// CHECK13-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP96:%.*]] = load i32, i32* [[N]], align 4
-// CHECK13-NEXT:    store i32 [[TMP96]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK13-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK13-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP97]], 0
+// CHECK13-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
+// CHECK13-NEXT:    store i8* null, i8** [[TMP77]], align 8
+// CHECK13-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP79]], align 8
+// CHECK13-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 8
+// CHECK13-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
+// CHECK13-NEXT:    store i64 [[TMP66]], i64* [[TMP82]], align 8
+// CHECK13-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
+// CHECK13-NEXT:    store i8* null, i8** [[TMP83]], align 8
+// CHECK13-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
+// CHECK13-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP65]], i64* [[TMP85]], align 8
+// CHECK13-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
+// CHECK13-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP65]], i64* [[TMP87]], align 8
+// CHECK13-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
+// CHECK13-NEXT:    store i8* null, i8** [[TMP88]], align 8
+// CHECK13-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP92:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    store i32 [[TMP92]], i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK13-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK13-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP93]], 0
 // CHECK13-NEXT:    [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1
 // CHECK13-NEXT:    [[SUB31:%.*]] = sub nsw i32 [[DIV30]], 1
 // CHECK13-NEXT:    store i32 [[SUB31]], i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK13-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK13-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP98]], 1
-// CHECK13-NEXT:    [[TMP99:%.*]] = zext i32 [[ADD32]] to i64
-// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP99]])
-// CHECK13-NEXT:    [[TMP100:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP93]], i8** [[TMP94]], i64* [[TMP95]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP101:%.*]] = icmp ne i32 [[TMP100]], 0
-// CHECK13-NEXT:    br i1 [[TMP101]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
+// CHECK13-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
+// CHECK13-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP94]], 1
+// CHECK13-NEXT:    [[TMP95:%.*]] = zext i32 [[ADD32]] to i64
+// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP95]])
+// CHECK13-NEXT:    [[TMP96:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP89]], i8** [[TMP90]], i64* [[TMP91]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP97:%.*]] = icmp ne i32 [[TMP96]], 0
+// CHECK13-NEXT:    br i1 [[TMP97]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
 // CHECK13:       omp_offload.failed33:
-// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i64 [[TMP65]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP67]]) #[[ATTR3]]
+// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i64 [[TMP63]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP65]]) #[[ATTR3]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT34]]
 // CHECK13:       omp_offload.cont34:
-// CHECK13-NEXT:    [[TMP102:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    [[TMP98:%.*]] = load i32, i32* [[N]], align 4
 // CHECK13-NEXT:    [[CONV36:%.*]] = bitcast i64* [[N_CASTED35]] to i32*
-// CHECK13-NEXT:    store i32 [[TMP102]], i32* [[CONV36]], align 4
-// CHECK13-NEXT:    [[TMP103:%.*]] = load i64, i64* [[N_CASTED35]], align 8
-// CHECK13-NEXT:    [[TMP104:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK13-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP103]], i64* [[TMP106]], align 8
-// CHECK13-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
+// CHECK13-NEXT:    store i32 [[TMP98]], i32* [[CONV36]], align 4
+// CHECK13-NEXT:    [[TMP99:%.*]] = load i64, i64* [[N_CASTED35]], align 8
+// CHECK13-NEXT:    [[TMP100:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK13-NEXT:    [[TMP101:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES40]] to i8*
+// CHECK13-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP101]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i64 24, i1 false)
+// CHECK13-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP103:%.*]] = bitcast i8** [[TMP102]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP99]], i64* [[TMP103]], align 8
+// CHECK13-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP99]], i64* [[TMP105]], align 8
+// CHECK13-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 0
+// CHECK13-NEXT:    store i8* null, i8** [[TMP106]], align 8
+// CHECK13-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 1
 // CHECK13-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP103]], i64* [[TMP108]], align 8
-// CHECK13-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 0
-// CHECK13-NEXT:    store i64 4, i64* [[TMP109]], align 8
-// CHECK13-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 0
-// CHECK13-NEXT:    store i8* null, i8** [[TMP110]], align 8
-// CHECK13-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 1
-// CHECK13-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP112]], align 8
-// CHECK13-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 1
-// CHECK13-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP114]], align 8
-// CHECK13-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 1
-// CHECK13-NEXT:    store i64 8, i64* [[TMP115]], align 8
-// CHECK13-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 1
-// CHECK13-NEXT:    store i8* null, i8** [[TMP116]], align 8
-// CHECK13-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP118]], align 8
-// CHECK13-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP120]], align 8
-// CHECK13-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 2
-// CHECK13-NEXT:    store i64 [[TMP104]], i64* [[TMP121]], align 8
-// CHECK13-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 2
-// CHECK13-NEXT:    store i8* null, i8** [[TMP122]], align 8
-// CHECK13-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP126:%.*]] = load i32, i32* [[N]], align 4
-// CHECK13-NEXT:    store i32 [[TMP126]], i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK13-NEXT:    [[TMP127:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK13-NEXT:    [[SUB44:%.*]] = sub nsw i32 [[TMP127]], 0
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP108]], align 8
+// CHECK13-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP110]], align 8
+// CHECK13-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 1
+// CHECK13-NEXT:    store i8* null, i8** [[TMP111]], align 8
+// CHECK13-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP113]], align 8
+// CHECK13-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP115]], align 8
+// CHECK13-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 2
+// CHECK13-NEXT:    store i64 [[TMP100]], i64* [[TMP116]], align 8
+// CHECK13-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 2
+// CHECK13-NEXT:    store i8* null, i8** [[TMP117]], align 8
+// CHECK13-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP121:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    store i32 [[TMP121]], i32* [[DOTCAPTURE_EXPR_42]], align 4
+// CHECK13-NEXT:    [[TMP122:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
+// CHECK13-NEXT:    [[SUB44:%.*]] = sub nsw i32 [[TMP122]], 0
 // CHECK13-NEXT:    [[DIV45:%.*]] = sdiv i32 [[SUB44]], 1
 // CHECK13-NEXT:    [[SUB46:%.*]] = sub nsw i32 [[DIV45]], 1
 // CHECK13-NEXT:    store i32 [[SUB46]], i32* [[DOTCAPTURE_EXPR_43]], align 4
-// CHECK13-NEXT:    [[TMP128:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_43]], align 4
-// CHECK13-NEXT:    [[ADD47:%.*]] = add nsw i32 [[TMP128]], 1
-// CHECK13-NEXT:    [[TMP129:%.*]] = zext i32 [[ADD47]] to i64
-// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP129]])
-// CHECK13-NEXT:    [[TMP130:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP123]], i8** [[TMP124]], i64* [[TMP125]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP131:%.*]] = icmp ne i32 [[TMP130]], 0
-// CHECK13-NEXT:    br i1 [[TMP131]], label [[OMP_OFFLOAD_FAILED48:%.*]], label [[OMP_OFFLOAD_CONT49:%.*]]
+// CHECK13-NEXT:    [[TMP123:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_43]], align 4
+// CHECK13-NEXT:    [[ADD47:%.*]] = add nsw i32 [[TMP123]], 1
+// CHECK13-NEXT:    [[TMP124:%.*]] = zext i32 [[ADD47]] to i64
+// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP124]])
+// CHECK13-NEXT:    [[TMP125:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP118]], i8** [[TMP119]], i64* [[TMP120]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP126:%.*]] = icmp ne i32 [[TMP125]], 0
+// CHECK13-NEXT:    br i1 [[TMP126]], label [[OMP_OFFLOAD_FAILED48:%.*]], label [[OMP_OFFLOAD_CONT49:%.*]]
 // CHECK13:       omp_offload.failed48:
-// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i64 [[TMP103]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i64 [[TMP99]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT49]]
 // CHECK13:       omp_offload.cont49:
-// CHECK13-NEXT:    [[TMP132:%.*]] = load i32, i32* [[M]], align 4
-// CHECK13-NEXT:    store i32 [[TMP132]], i32* [[DOTCAPTURE_EXPR_50]], align 4
-// CHECK13-NEXT:    [[TMP133:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    [[TMP127:%.*]] = load i32, i32* [[M]], align 4
+// CHECK13-NEXT:    store i32 [[TMP127]], i32* [[DOTCAPTURE_EXPR_50]], align 4
+// CHECK13-NEXT:    [[TMP128:%.*]] = load i32, i32* [[N]], align 4
 // CHECK13-NEXT:    [[CONV52:%.*]] = bitcast i64* [[N_CASTED51]] to i32*
-// CHECK13-NEXT:    store i32 [[TMP133]], i32* [[CONV52]], align 4
-// CHECK13-NEXT:    [[TMP134:%.*]] = load i64, i64* [[N_CASTED51]], align 8
-// CHECK13-NEXT:    [[TMP135:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_50]], align 4
+// CHECK13-NEXT:    store i32 [[TMP128]], i32* [[CONV52]], align 4
+// CHECK13-NEXT:    [[TMP129:%.*]] = load i64, i64* [[N_CASTED51]], align 8
+// CHECK13-NEXT:    [[TMP130:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_50]], align 4
 // CHECK13-NEXT:    [[CONV54:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED53]] to i32*
-// CHECK13-NEXT:    store i32 [[TMP135]], i32* [[CONV54]], align 4
-// CHECK13-NEXT:    [[TMP136:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED53]], align 8
-// CHECK13-NEXT:    [[TMP137:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK13-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP134]], i64* [[TMP139]], align 8
-// CHECK13-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP134]], i64* [[TMP141]], align 8
-// CHECK13-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 0
-// CHECK13-NEXT:    store i64 4, i64* [[TMP142]], align 8
-// CHECK13-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 0
+// CHECK13-NEXT:    store i32 [[TMP130]], i32* [[CONV54]], align 4
+// CHECK13-NEXT:    [[TMP131:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED53]], align 8
+// CHECK13-NEXT:    [[TMP132:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK13-NEXT:    [[TMP133:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES58]] to i8*
+// CHECK13-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP133]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i64 32, i1 false)
+// CHECK13-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP129]], i64* [[TMP135]], align 8
+// CHECK13-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP129]], i64* [[TMP137]], align 8
+// CHECK13-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 0
+// CHECK13-NEXT:    store i8* null, i8** [[TMP138]], align 8
+// CHECK13-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP140]], align 8
+// CHECK13-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP142]], align 8
+// CHECK13-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 1
 // CHECK13-NEXT:    store i8* null, i8** [[TMP143]], align 8
-// CHECK13-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 1
-// CHECK13-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP145]], align 8
-// CHECK13-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 1
-// CHECK13-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP147]], align 8
-// CHECK13-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 1
-// CHECK13-NEXT:    store i64 8, i64* [[TMP148]], align 8
-// CHECK13-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 1
+// CHECK13-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP145]], align 8
+// CHECK13-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP147]], align 8
+// CHECK13-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 2
+// CHECK13-NEXT:    store i64 [[TMP132]], i64* [[TMP148]], align 8
+// CHECK13-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 2
 // CHECK13-NEXT:    store i8* null, i8** [[TMP149]], align 8
-// CHECK13-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP151]], align 8
-// CHECK13-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP153]], align 8
-// CHECK13-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 2
-// CHECK13-NEXT:    store i64 [[TMP137]], i64* [[TMP154]], align 8
-// CHECK13-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 2
-// CHECK13-NEXT:    store i8* null, i8** [[TMP155]], align 8
-// CHECK13-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 3
-// CHECK13-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP136]], i64* [[TMP157]], align 8
-// CHECK13-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 3
-// CHECK13-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP136]], i64* [[TMP159]], align 8
-// CHECK13-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 3
-// CHECK13-NEXT:    store i64 4, i64* [[TMP160]], align 8
-// CHECK13-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 3
-// CHECK13-NEXT:    store i8* null, i8** [[TMP161]], align 8
-// CHECK13-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP165:%.*]] = load i32, i32* [[N]], align 4
-// CHECK13-NEXT:    store i32 [[TMP165]], i32* [[DOTCAPTURE_EXPR_60]], align 4
-// CHECK13-NEXT:    [[TMP166:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_60]], align 4
-// CHECK13-NEXT:    [[SUB62:%.*]] = sub nsw i32 [[TMP166]], 0
+// CHECK13-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 3
+// CHECK13-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP131]], i64* [[TMP151]], align 8
+// CHECK13-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 3
+// CHECK13-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP131]], i64* [[TMP153]], align 8
+// CHECK13-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 3
+// CHECK13-NEXT:    store i8* null, i8** [[TMP154]], align 8
+// CHECK13-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP158:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    store i32 [[TMP158]], i32* [[DOTCAPTURE_EXPR_60]], align 4
+// CHECK13-NEXT:    [[TMP159:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_60]], align 4
+// CHECK13-NEXT:    [[SUB62:%.*]] = sub nsw i32 [[TMP159]], 0
 // CHECK13-NEXT:    [[DIV63:%.*]] = sdiv i32 [[SUB62]], 1
 // CHECK13-NEXT:    [[SUB64:%.*]] = sub nsw i32 [[DIV63]], 1
 // CHECK13-NEXT:    store i32 [[SUB64]], i32* [[DOTCAPTURE_EXPR_61]], align 4
-// CHECK13-NEXT:    [[TMP167:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_61]], align 4
-// CHECK13-NEXT:    [[ADD65:%.*]] = add nsw i32 [[TMP167]], 1
-// CHECK13-NEXT:    [[TMP168:%.*]] = zext i32 [[ADD65]] to i64
-// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP168]])
-// CHECK13-NEXT:    [[TMP169:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP162]], i8** [[TMP163]], i64* [[TMP164]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP170:%.*]] = icmp ne i32 [[TMP169]], 0
-// CHECK13-NEXT:    br i1 [[TMP170]], label [[OMP_OFFLOAD_FAILED66:%.*]], label [[OMP_OFFLOAD_CONT67:%.*]]
+// CHECK13-NEXT:    [[TMP160:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_61]], align 4
+// CHECK13-NEXT:    [[ADD65:%.*]] = add nsw i32 [[TMP160]], 1
+// CHECK13-NEXT:    [[TMP161:%.*]] = zext i32 [[ADD65]] to i64
+// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP161]])
+// CHECK13-NEXT:    [[TMP162:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP155]], i8** [[TMP156]], i64* [[TMP157]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP163:%.*]] = icmp ne i32 [[TMP162]], 0
+// CHECK13-NEXT:    br i1 [[TMP163]], label [[OMP_OFFLOAD_FAILED66:%.*]], label [[OMP_OFFLOAD_CONT67:%.*]]
 // CHECK13:       omp_offload.failed66:
-// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i64 [[TMP134]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP136]]) #[[ATTR3]]
+// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i64 [[TMP129]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP131]]) #[[ATTR3]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT67]]
 // CHECK13:       omp_offload.cont67:
-// CHECK13-NEXT:    [[TMP171:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK13-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP171]])
+// CHECK13-NEXT:    [[TMP164:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK13-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP164]])
 // CHECK13-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK13-NEXT:    [[TMP172:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK13-NEXT:    call void @llvm.stackrestore(i8* [[TMP172]])
-// CHECK13-NEXT:    [[TMP173:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK13-NEXT:    ret i32 [[TMP173]]
+// CHECK13-NEXT:    [[TMP165:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK13-NEXT:    call void @llvm.stackrestore(i8* [[TMP165]])
+// CHECK13-NEXT:    [[TMP166:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK13-NEXT:    ret i32 [[TMP166]]
 //
 //
 // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139
@@ -8253,11 +8239,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK13-NEXT:    [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -8344,7 +8330,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP22]], i32* [[CONV9]], align 4
 // CHECK13-NEXT:    [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]])
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -8386,7 +8372,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -8511,11 +8497,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK13-NEXT:    [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -8592,7 +8578,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP19]], i32* [[CONV6]], align 4
 // CHECK13-NEXT:    [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]])
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -8611,7 +8597,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -8737,11 +8723,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK13-NEXT:    [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -8826,7 +8812,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP21]], i32* [[CONV9]], align 4
 // CHECK13-NEXT:    [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]])
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -8845,7 +8831,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -8951,7 +8937,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK13-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK13-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK13-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -8993,7 +8979,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK13-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK13-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK13-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK13:       omp_offload.failed:
@@ -9011,7 +8997,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK13-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK13-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK13-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK13:       omp_offload.failed5:
@@ -9043,7 +9029,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK13-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK13-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK13-NEXT:    br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK13:       omp_offload.failed11:
@@ -9061,7 +9047,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK13-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK13-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK13-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK13:       omp_offload.failed17:
@@ -9093,7 +9079,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK13-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK13-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
 // CHECK13-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED26:%.*]], label [[OMP_OFFLOAD_CONT27:%.*]]
 // CHECK13:       omp_offload.failed26:
@@ -9109,11 +9095,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK13-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK13-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9161,7 +9147,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK13-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK13-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -9176,7 +9162,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9259,11 +9245,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK13-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK13-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9311,7 +9297,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK13-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK13-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -9326,7 +9312,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9417,11 +9403,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP1]], i32* [[CONV1]], align 4
 // CHECK13-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9477,7 +9463,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4
 // CHECK13-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -9492,7 +9478,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9600,11 +9586,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK13-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK13-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9652,7 +9638,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK13-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK13-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -9667,7 +9653,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9756,11 +9742,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP1]], i32* [[CONV1]], align 4
 // CHECK13-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9816,7 +9802,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4
 // CHECK13-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -9831,7 +9817,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9911,7 +9897,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK13-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK13-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK13-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK13-NEXT:    ret void
@@ -9987,14 +9973,14 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK14-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK14-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK14-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK14-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK14-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK14-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK14-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK14-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK14-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK14-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK14-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK14-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK14-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -10003,304 +9989,290 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK14-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK14-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK14-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK14-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK14-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK14-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK14-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK14-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK14-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK14-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK14-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK14-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK14-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK14-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK14-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK14-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK14-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK14-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK14-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK14-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK14-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK14-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK14-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK14-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK14-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK14-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK14-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK14-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK14-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK14-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK14-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK14-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK14-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK14-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK14-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK14-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK14-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK14-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK14-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK14-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK14:       omp_offload.failed:
 // CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK14-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK14:       omp_offload.cont:
-// CHECK14-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK14-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK14-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK14-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK14-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK14-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK14-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK14-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK14-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK14-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i64 24, i1 false)
 // CHECK14-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK14-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK14-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK14-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK14-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK14-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK14-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK14-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK14-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK14-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK14-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK14-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK14-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK14-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK14-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK14-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK14-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK14-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK14-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK14-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK14-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK14-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK14-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK14-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK14-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK14-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK14-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK14-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK14-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK14-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK14-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK14-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK14-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK14-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK14-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK14-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK14-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK14-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK14-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK14-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK14-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK14-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK14-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK14-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK14-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK14-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK14-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK14-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP60]])
-// CHECK14-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK14-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK14-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK14-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK14-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK14-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP58]])
+// CHECK14-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK14-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK14:       omp_offload.failed16:
-// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK14-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK14:       omp_offload.cont17:
-// CHECK14-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M]], align 4
-// CHECK14-NEXT:    store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK14-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    [[TMP61:%.*]] = load i32, i32* [[M]], align 4
+// CHECK14-NEXT:    store i32 [[TMP61]], i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK14-NEXT:    [[TMP62:%.*]] = load i32, i32* [[N]], align 4
 // CHECK14-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK14-NEXT:    store i32 [[TMP64]], i32* [[CONV20]], align 4
-// CHECK14-NEXT:    [[TMP65:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK14-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK14-NEXT:    store i32 [[TMP62]], i32* [[CONV20]], align 4
+// CHECK14-NEXT:    [[TMP63:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK14-NEXT:    [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
 // CHECK14-NEXT:    [[CONV21:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
-// CHECK14-NEXT:    store i32 [[TMP66]], i32* [[CONV21]], align 4
-// CHECK14-NEXT:    [[TMP67:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    [[TMP68:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK14-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP65]], i64* [[TMP70]], align 8
-// CHECK14-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP65]], i64* [[TMP72]], align 8
-// CHECK14-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK14-NEXT:    store i64 4, i64* [[TMP73]], align 8
-// CHECK14-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
-// CHECK14-NEXT:    store i8* null, i8** [[TMP74]], align 8
-// CHECK14-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK14-NEXT:    store i32 [[TMP64]], i32* [[CONV21]], align 4
+// CHECK14-NEXT:    [[TMP65:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK14-NEXT:    [[TMP66:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK14-NEXT:    [[TMP67:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES25]] to i8*
+// CHECK14-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP67]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i64 32, i1 false)
+// CHECK14-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP63]], i64* [[TMP69]], align 8
+// CHECK14-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP63]], i64* [[TMP71]], align 8
+// CHECK14-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
+// CHECK14-NEXT:    store i8* null, i8** [[TMP72]], align 8
+// CHECK14-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP74]], align 8
+// CHECK14-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
 // CHECK14-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i64*
 // CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP76]], align 8
-// CHECK14-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
-// CHECK14-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
-// CHECK14-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 1
-// CHECK14-NEXT:    store i64 8, i64* [[TMP79]], align 8
-// CHECK14-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
-// CHECK14-NEXT:    store i8* null, i8** [[TMP80]], align 8
-// CHECK14-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP82]], align 8
-// CHECK14-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 8
-// CHECK14-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
-// CHECK14-NEXT:    store i64 [[TMP68]], i64* [[TMP85]], align 8
-// CHECK14-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
-// CHECK14-NEXT:    store i8* null, i8** [[TMP86]], align 8
-// CHECK14-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
-// CHECK14-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 8
-// CHECK14-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
-// CHECK14-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK14-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 3
-// CHECK14-NEXT:    store i64 4, i64* [[TMP91]], align 8
-// CHECK14-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
-// CHECK14-NEXT:    store i8* null, i8** [[TMP92]], align 8
-// CHECK14-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP96:%.*]] = load i32, i32* [[N]], align 4
-// CHECK14-NEXT:    store i32 [[TMP96]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK14-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK14-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP97]], 0
+// CHECK14-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
+// CHECK14-NEXT:    store i8* null, i8** [[TMP77]], align 8
+// CHECK14-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP79]], align 8
+// CHECK14-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 8
+// CHECK14-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
+// CHECK14-NEXT:    store i64 [[TMP66]], i64* [[TMP82]], align 8
+// CHECK14-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
+// CHECK14-NEXT:    store i8* null, i8** [[TMP83]], align 8
+// CHECK14-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
+// CHECK14-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP65]], i64* [[TMP85]], align 8
+// CHECK14-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
+// CHECK14-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP65]], i64* [[TMP87]], align 8
+// CHECK14-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
+// CHECK14-NEXT:    store i8* null, i8** [[TMP88]], align 8
+// CHECK14-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP92:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    store i32 [[TMP92]], i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK14-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK14-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP93]], 0
 // CHECK14-NEXT:    [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1
 // CHECK14-NEXT:    [[SUB31:%.*]] = sub nsw i32 [[DIV30]], 1
 // CHECK14-NEXT:    store i32 [[SUB31]], i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK14-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK14-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP98]], 1
-// CHECK14-NEXT:    [[TMP99:%.*]] = zext i32 [[ADD32]] to i64
-// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP99]])
-// CHECK14-NEXT:    [[TMP100:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP93]], i8** [[TMP94]], i64* [[TMP95]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK14-NEXT:    [[TMP101:%.*]] = icmp ne i32 [[TMP100]], 0
-// CHECK14-NEXT:    br i1 [[TMP101]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
+// CHECK14-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
+// CHECK14-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP94]], 1
+// CHECK14-NEXT:    [[TMP95:%.*]] = zext i32 [[ADD32]] to i64
+// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP95]])
+// CHECK14-NEXT:    [[TMP96:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP89]], i8** [[TMP90]], i64* [[TMP91]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP97:%.*]] = icmp ne i32 [[TMP96]], 0
+// CHECK14-NEXT:    br i1 [[TMP97]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
 // CHECK14:       omp_offload.failed33:
-// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i64 [[TMP65]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP67]]) #[[ATTR3]]
+// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i64 [[TMP63]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP65]]) #[[ATTR3]]
 // CHECK14-NEXT:    br label [[OMP_OFFLOAD_CONT34]]
 // CHECK14:       omp_offload.cont34:
-// CHECK14-NEXT:    [[TMP102:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    [[TMP98:%.*]] = load i32, i32* [[N]], align 4
 // CHECK14-NEXT:    [[CONV36:%.*]] = bitcast i64* [[N_CASTED35]] to i32*
-// CHECK14-NEXT:    store i32 [[TMP102]], i32* [[CONV36]], align 4
-// CHECK14-NEXT:    [[TMP103:%.*]] = load i64, i64* [[N_CASTED35]], align 8
-// CHECK14-NEXT:    [[TMP104:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK14-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP103]], i64* [[TMP106]], align 8
-// CHECK14-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
+// CHECK14-NEXT:    store i32 [[TMP98]], i32* [[CONV36]], align 4
+// CHECK14-NEXT:    [[TMP99:%.*]] = load i64, i64* [[N_CASTED35]], align 8
+// CHECK14-NEXT:    [[TMP100:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK14-NEXT:    [[TMP101:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES40]] to i8*
+// CHECK14-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP101]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i64 24, i1 false)
+// CHECK14-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP103:%.*]] = bitcast i8** [[TMP102]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP99]], i64* [[TMP103]], align 8
+// CHECK14-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP99]], i64* [[TMP105]], align 8
+// CHECK14-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 0
+// CHECK14-NEXT:    store i8* null, i8** [[TMP106]], align 8
+// CHECK14-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 1
 // CHECK14-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP103]], i64* [[TMP108]], align 8
-// CHECK14-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 0
-// CHECK14-NEXT:    store i64 4, i64* [[TMP109]], align 8
-// CHECK14-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 0
-// CHECK14-NEXT:    store i8* null, i8** [[TMP110]], align 8
-// CHECK14-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 1
-// CHECK14-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP112]], align 8
-// CHECK14-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 1
-// CHECK14-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP114]], align 8
-// CHECK14-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 1
-// CHECK14-NEXT:    store i64 8, i64* [[TMP115]], align 8
-// CHECK14-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 1
-// CHECK14-NEXT:    store i8* null, i8** [[TMP116]], align 8
-// CHECK14-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP118]], align 8
-// CHECK14-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP120]], align 8
-// CHECK14-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 2
-// CHECK14-NEXT:    store i64 [[TMP104]], i64* [[TMP121]], align 8
-// CHECK14-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 2
-// CHECK14-NEXT:    store i8* null, i8** [[TMP122]], align 8
-// CHECK14-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP126:%.*]] = load i32, i32* [[N]], align 4
-// CHECK14-NEXT:    store i32 [[TMP126]], i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK14-NEXT:    [[TMP127:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK14-NEXT:    [[SUB44:%.*]] = sub nsw i32 [[TMP127]], 0
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP108]], align 8
+// CHECK14-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP110]], align 8
+// CHECK14-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 1
+// CHECK14-NEXT:    store i8* null, i8** [[TMP111]], align 8
+// CHECK14-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP113]], align 8
+// CHECK14-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP115]], align 8
+// CHECK14-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 2
+// CHECK14-NEXT:    store i64 [[TMP100]], i64* [[TMP116]], align 8
+// CHECK14-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 2
+// CHECK14-NEXT:    store i8* null, i8** [[TMP117]], align 8
+// CHECK14-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP121:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    store i32 [[TMP121]], i32* [[DOTCAPTURE_EXPR_42]], align 4
+// CHECK14-NEXT:    [[TMP122:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
+// CHECK14-NEXT:    [[SUB44:%.*]] = sub nsw i32 [[TMP122]], 0
 // CHECK14-NEXT:    [[DIV45:%.*]] = sdiv i32 [[SUB44]], 1
 // CHECK14-NEXT:    [[SUB46:%.*]] = sub nsw i32 [[DIV45]], 1
 // CHECK14-NEXT:    store i32 [[SUB46]], i32* [[DOTCAPTURE_EXPR_43]], align 4
-// CHECK14-NEXT:    [[TMP128:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_43]], align 4
-// CHECK14-NEXT:    [[ADD47:%.*]] = add nsw i32 [[TMP128]], 1
-// CHECK14-NEXT:    [[TMP129:%.*]] = zext i32 [[ADD47]] to i64
-// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP129]])
-// CHECK14-NEXT:    [[TMP130:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP123]], i8** [[TMP124]], i64* [[TMP125]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK14-NEXT:    [[TMP131:%.*]] = icmp ne i32 [[TMP130]], 0
-// CHECK14-NEXT:    br i1 [[TMP131]], label [[OMP_OFFLOAD_FAILED48:%.*]], label [[OMP_OFFLOAD_CONT49:%.*]]
+// CHECK14-NEXT:    [[TMP123:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_43]], align 4
+// CHECK14-NEXT:    [[ADD47:%.*]] = add nsw i32 [[TMP123]], 1
+// CHECK14-NEXT:    [[TMP124:%.*]] = zext i32 [[ADD47]] to i64
+// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP124]])
+// CHECK14-NEXT:    [[TMP125:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP118]], i8** [[TMP119]], i64* [[TMP120]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP126:%.*]] = icmp ne i32 [[TMP125]], 0
+// CHECK14-NEXT:    br i1 [[TMP126]], label [[OMP_OFFLOAD_FAILED48:%.*]], label [[OMP_OFFLOAD_CONT49:%.*]]
 // CHECK14:       omp_offload.failed48:
-// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i64 [[TMP103]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i64 [[TMP99]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK14-NEXT:    br label [[OMP_OFFLOAD_CONT49]]
 // CHECK14:       omp_offload.cont49:
-// CHECK14-NEXT:    [[TMP132:%.*]] = load i32, i32* [[M]], align 4
-// CHECK14-NEXT:    store i32 [[TMP132]], i32* [[DOTCAPTURE_EXPR_50]], align 4
-// CHECK14-NEXT:    [[TMP133:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    [[TMP127:%.*]] = load i32, i32* [[M]], align 4
+// CHECK14-NEXT:    store i32 [[TMP127]], i32* [[DOTCAPTURE_EXPR_50]], align 4
+// CHECK14-NEXT:    [[TMP128:%.*]] = load i32, i32* [[N]], align 4
 // CHECK14-NEXT:    [[CONV52:%.*]] = bitcast i64* [[N_CASTED51]] to i32*
-// CHECK14-NEXT:    store i32 [[TMP133]], i32* [[CONV52]], align 4
-// CHECK14-NEXT:    [[TMP134:%.*]] = load i64, i64* [[N_CASTED51]], align 8
-// CHECK14-NEXT:    [[TMP135:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_50]], align 4
+// CHECK14-NEXT:    store i32 [[TMP128]], i32* [[CONV52]], align 4
+// CHECK14-NEXT:    [[TMP129:%.*]] = load i64, i64* [[N_CASTED51]], align 8
+// CHECK14-NEXT:    [[TMP130:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_50]], align 4
 // CHECK14-NEXT:    [[CONV54:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED53]] to i32*
-// CHECK14-NEXT:    store i32 [[TMP135]], i32* [[CONV54]], align 4
-// CHECK14-NEXT:    [[TMP136:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED53]], align 8
-// CHECK14-NEXT:    [[TMP137:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK14-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP134]], i64* [[TMP139]], align 8
-// CHECK14-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP134]], i64* [[TMP141]], align 8
-// CHECK14-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 0
-// CHECK14-NEXT:    store i64 4, i64* [[TMP142]], align 8
-// CHECK14-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 0
+// CHECK14-NEXT:    store i32 [[TMP130]], i32* [[CONV54]], align 4
+// CHECK14-NEXT:    [[TMP131:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED53]], align 8
+// CHECK14-NEXT:    [[TMP132:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK14-NEXT:    [[TMP133:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES58]] to i8*
+// CHECK14-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP133]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i64 32, i1 false)
+// CHECK14-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP129]], i64* [[TMP135]], align 8
+// CHECK14-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP129]], i64* [[TMP137]], align 8
+// CHECK14-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 0
+// CHECK14-NEXT:    store i8* null, i8** [[TMP138]], align 8
+// CHECK14-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP140]], align 8
+// CHECK14-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP142]], align 8
+// CHECK14-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 1
 // CHECK14-NEXT:    store i8* null, i8** [[TMP143]], align 8
-// CHECK14-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 1
-// CHECK14-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP145]], align 8
-// CHECK14-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 1
-// CHECK14-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP147]], align 8
-// CHECK14-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 1
-// CHECK14-NEXT:    store i64 8, i64* [[TMP148]], align 8
-// CHECK14-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 1
+// CHECK14-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP145]], align 8
+// CHECK14-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP147]], align 8
+// CHECK14-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 2
+// CHECK14-NEXT:    store i64 [[TMP132]], i64* [[TMP148]], align 8
+// CHECK14-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 2
 // CHECK14-NEXT:    store i8* null, i8** [[TMP149]], align 8
-// CHECK14-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP151]], align 8
-// CHECK14-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP153]], align 8
-// CHECK14-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 2
-// CHECK14-NEXT:    store i64 [[TMP137]], i64* [[TMP154]], align 8
-// CHECK14-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 2
-// CHECK14-NEXT:    store i8* null, i8** [[TMP155]], align 8
-// CHECK14-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 3
-// CHECK14-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP136]], i64* [[TMP157]], align 8
-// CHECK14-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 3
-// CHECK14-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP136]], i64* [[TMP159]], align 8
-// CHECK14-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 3
-// CHECK14-NEXT:    store i64 4, i64* [[TMP160]], align 8
-// CHECK14-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 3
-// CHECK14-NEXT:    store i8* null, i8** [[TMP161]], align 8
-// CHECK14-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP165:%.*]] = load i32, i32* [[N]], align 4
-// CHECK14-NEXT:    store i32 [[TMP165]], i32* [[DOTCAPTURE_EXPR_60]], align 4
-// CHECK14-NEXT:    [[TMP166:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_60]], align 4
-// CHECK14-NEXT:    [[SUB62:%.*]] = sub nsw i32 [[TMP166]], 0
+// CHECK14-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 3
+// CHECK14-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP131]], i64* [[TMP151]], align 8
+// CHECK14-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 3
+// CHECK14-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP131]], i64* [[TMP153]], align 8
+// CHECK14-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 3
+// CHECK14-NEXT:    store i8* null, i8** [[TMP154]], align 8
+// CHECK14-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP158:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    store i32 [[TMP158]], i32* [[DOTCAPTURE_EXPR_60]], align 4
+// CHECK14-NEXT:    [[TMP159:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_60]], align 4
+// CHECK14-NEXT:    [[SUB62:%.*]] = sub nsw i32 [[TMP159]], 0
 // CHECK14-NEXT:    [[DIV63:%.*]] = sdiv i32 [[SUB62]], 1
 // CHECK14-NEXT:    [[SUB64:%.*]] = sub nsw i32 [[DIV63]], 1
 // CHECK14-NEXT:    store i32 [[SUB64]], i32* [[DOTCAPTURE_EXPR_61]], align 4
-// CHECK14-NEXT:    [[TMP167:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_61]], align 4
-// CHECK14-NEXT:    [[ADD65:%.*]] = add nsw i32 [[TMP167]], 1
-// CHECK14-NEXT:    [[TMP168:%.*]] = zext i32 [[ADD65]] to i64
-// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP168]])
-// CHECK14-NEXT:    [[TMP169:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP162]], i8** [[TMP163]], i64* [[TMP164]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK14-NEXT:    [[TMP170:%.*]] = icmp ne i32 [[TMP169]], 0
-// CHECK14-NEXT:    br i1 [[TMP170]], label [[OMP_OFFLOAD_FAILED66:%.*]], label [[OMP_OFFLOAD_CONT67:%.*]]
+// CHECK14-NEXT:    [[TMP160:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_61]], align 4
+// CHECK14-NEXT:    [[ADD65:%.*]] = add nsw i32 [[TMP160]], 1
+// CHECK14-NEXT:    [[TMP161:%.*]] = zext i32 [[ADD65]] to i64
+// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP161]])
+// CHECK14-NEXT:    [[TMP162:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP155]], i8** [[TMP156]], i64* [[TMP157]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP163:%.*]] = icmp ne i32 [[TMP162]], 0
+// CHECK14-NEXT:    br i1 [[TMP163]], label [[OMP_OFFLOAD_FAILED66:%.*]], label [[OMP_OFFLOAD_CONT67:%.*]]
 // CHECK14:       omp_offload.failed66:
-// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i64 [[TMP134]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP136]]) #[[ATTR3]]
+// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i64 [[TMP129]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP131]]) #[[ATTR3]]
 // CHECK14-NEXT:    br label [[OMP_OFFLOAD_CONT67]]
 // CHECK14:       omp_offload.cont67:
-// CHECK14-NEXT:    [[TMP171:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK14-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP171]])
+// CHECK14-NEXT:    [[TMP164:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK14-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP164]])
 // CHECK14-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK14-NEXT:    [[TMP172:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK14-NEXT:    call void @llvm.stackrestore(i8* [[TMP172]])
-// CHECK14-NEXT:    [[TMP173:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK14-NEXT:    ret i32 [[TMP173]]
+// CHECK14-NEXT:    [[TMP165:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK14-NEXT:    call void @llvm.stackrestore(i8* [[TMP165]])
+// CHECK14-NEXT:    [[TMP166:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK14-NEXT:    ret i32 [[TMP166]]
 //
 //
 // CHECK14-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139
@@ -10772,11 +10744,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK14-NEXT:    [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10863,7 +10835,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP22]], i32* [[CONV9]], align 4
 // CHECK14-NEXT:    [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]])
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -10905,7 +10877,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11030,11 +11002,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK14-NEXT:    [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11111,7 +11083,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP19]], i32* [[CONV6]], align 4
 // CHECK14-NEXT:    [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]])
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -11130,7 +11102,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11256,11 +11228,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK14-NEXT:    [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11345,7 +11317,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP21]], i32* [[CONV9]], align 4
 // CHECK14-NEXT:    [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]])
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -11364,7 +11336,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11470,7 +11442,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK14-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK14-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK14-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK14-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -11512,7 +11484,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK14-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK14-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK14-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK14:       omp_offload.failed:
@@ -11530,7 +11502,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK14-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK14-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK14-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK14:       omp_offload.failed5:
@@ -11562,7 +11534,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK14-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK14-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK14-NEXT:    br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK14:       omp_offload.failed11:
@@ -11580,7 +11552,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK14-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK14-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK14-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK14:       omp_offload.failed17:
@@ -11612,7 +11584,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
 // CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK14-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK14-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
 // CHECK14-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED26:%.*]], label [[OMP_OFFLOAD_CONT27:%.*]]
 // CHECK14:       omp_offload.failed26:
@@ -11628,11 +11600,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK14-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK14-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11680,7 +11652,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK14-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK14-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -11695,7 +11667,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11778,11 +11750,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK14-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK14-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11830,7 +11802,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK14-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK14-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -11845,7 +11817,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11936,11 +11908,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP1]], i32* [[CONV1]], align 4
 // CHECK14-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11996,7 +11968,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4
 // CHECK14-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -12011,7 +11983,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -12119,11 +12091,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK14-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK14-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -12171,7 +12143,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK14-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK14-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -12186,7 +12158,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -12275,11 +12247,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP1]], i32* [[CONV1]], align 4
 // CHECK14-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -12335,7 +12307,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4
 // CHECK14-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -12350,7 +12322,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -12430,7 +12402,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK14-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK14-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK14-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK14-NEXT:    ret void
@@ -12505,14 +12477,14 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK15-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK15-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK15-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK15-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK15-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK15-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK15-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK15-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK15-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK15-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK15-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK15-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK15-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -12521,302 +12493,288 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK15-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK15-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK15-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK15-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK15-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK15-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK15-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK15-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK15-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK15-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK15-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK15-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK15-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK15-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK15-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK15-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK15-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK15-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK15-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK15-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK15-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK15-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK15-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK15-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK15-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK15-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK15-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK15-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK15-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK15-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK15-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK15-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK15-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK15-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK15-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK15-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK15-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK15-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK15-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK15:       omp_offload.failed:
 // CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK15-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK15:       omp_offload.cont:
-// CHECK15-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK15-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK15-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK15-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK15-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK15-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK15-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK15-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK15-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK15-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i32 24, i1 false)
 // CHECK15-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK15-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK15-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK15-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK15-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK15-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK15-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK15-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK15-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK15-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK15-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK15-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK15-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK15-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK15-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK15-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK15-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK15-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK15-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK15-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK15-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK15-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK15-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK15-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK15-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK15-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK15-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK15-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK15-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK15-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK15-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK15-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK15-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK15-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK15-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK15-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK15-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK15-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK15-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK15-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK15-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK15-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK15-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK15-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK15-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK15-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK15-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP61]])
-// CHECK15-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK15-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK15-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK15-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK15-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK15-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP59]])
+// CHECK15-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK15-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK15:       omp_offload.failed15:
-// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK15-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK15:       omp_offload.cont16:
-// CHECK15-NEXT:    [[TMP64:%.*]] = load i32, i32* [[M]], align 4
-// CHECK15-NEXT:    store i32 [[TMP64]], i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK15-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP65]], i32* [[N_CASTED18]], align 4
-// CHECK15-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N_CASTED18]], align 4
-// CHECK15-NEXT:    [[TMP67:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK15-NEXT:    store i32 [[TMP67]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK15-NEXT:    [[TMP68:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK15-NEXT:    [[TMP69:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK15-NEXT:    [[TMP70:%.*]] = sext i32 [[TMP69]] to i64
-// CHECK15-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP66]], i32* [[TMP72]], align 4
-// CHECK15-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP66]], i32* [[TMP74]], align 4
-// CHECK15-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK15-NEXT:    store i64 4, i64* [[TMP75]], align 4
-// CHECK15-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
-// CHECK15-NEXT:    store i8* null, i8** [[TMP76]], align 4
-// CHECK15-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP62:%.*]] = load i32, i32* [[M]], align 4
+// CHECK15-NEXT:    store i32 [[TMP62]], i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK15-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP63]], i32* [[N_CASTED18]], align 4
+// CHECK15-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N_CASTED18]], align 4
+// CHECK15-NEXT:    [[TMP65:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK15-NEXT:    store i32 [[TMP65]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK15-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK15-NEXT:    [[TMP67:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK15-NEXT:    [[TMP68:%.*]] = sext i32 [[TMP67]] to i64
+// CHECK15-NEXT:    [[TMP69:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES22]] to i8*
+// CHECK15-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP69]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i32 32, i1 false)
+// CHECK15-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP64]], i32* [[TMP71]], align 4
+// CHECK15-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP64]], i32* [[TMP73]], align 4
+// CHECK15-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
+// CHECK15-NEXT:    store i8* null, i8** [[TMP74]], align 4
+// CHECK15-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP76]], align 4
+// CHECK15-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
 // CHECK15-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
 // CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP78]], align 4
-// CHECK15-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
-// CHECK15-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
-// CHECK15-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 1
-// CHECK15-NEXT:    store i64 4, i64* [[TMP81]], align 4
-// CHECK15-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
-// CHECK15-NEXT:    store i8* null, i8** [[TMP82]], align 4
-// CHECK15-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 4
-// CHECK15-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP86]], align 4
-// CHECK15-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
-// CHECK15-NEXT:    store i64 [[TMP70]], i64* [[TMP87]], align 4
-// CHECK15-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
-// CHECK15-NEXT:    store i8* null, i8** [[TMP88]], align 4
-// CHECK15-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
-// CHECK15-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP68]], i32* [[TMP90]], align 4
-// CHECK15-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
-// CHECK15-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP68]], i32* [[TMP92]], align 4
-// CHECK15-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 3
-// CHECK15-NEXT:    store i64 4, i64* [[TMP93]], align 4
-// CHECK15-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
-// CHECK15-NEXT:    store i8* null, i8** [[TMP94]], align 4
-// CHECK15-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP98:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP98]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK15-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK15-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP99]], 0
+// CHECK15-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
+// CHECK15-NEXT:    store i8* null, i8** [[TMP79]], align 4
+// CHECK15-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 4
+// CHECK15-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 4
+// CHECK15-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
+// CHECK15-NEXT:    store i64 [[TMP68]], i64* [[TMP84]], align 4
+// CHECK15-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
+// CHECK15-NEXT:    store i8* null, i8** [[TMP85]], align 4
+// CHECK15-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
+// CHECK15-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP66]], i32* [[TMP87]], align 4
+// CHECK15-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
+// CHECK15-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP66]], i32* [[TMP89]], align 4
+// CHECK15-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
+// CHECK15-NEXT:    store i8* null, i8** [[TMP90]], align 4
+// CHECK15-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP94:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP94]], i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK15-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK15-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP95]], 0
 // CHECK15-NEXT:    [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1
 // CHECK15-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[DIV27]], 1
 // CHECK15-NEXT:    store i32 [[SUB28]], i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK15-NEXT:    [[TMP100:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK15-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP100]], 1
-// CHECK15-NEXT:    [[TMP101:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP101]])
-// CHECK15-NEXT:    [[TMP102:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP95]], i8** [[TMP96]], i64* [[TMP97]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK15-NEXT:    [[TMP103:%.*]] = icmp ne i32 [[TMP102]], 0
-// CHECK15-NEXT:    br i1 [[TMP103]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK15-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK15-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP96]], 1
+// CHECK15-NEXT:    [[TMP97:%.*]] = zext i32 [[ADD29]] to i64
+// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP97]])
+// CHECK15-NEXT:    [[TMP98:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP91]], i8** [[TMP92]], i64* [[TMP93]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP99:%.*]] = icmp ne i32 [[TMP98]], 0
+// CHECK15-NEXT:    br i1 [[TMP99]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
 // CHECK15:       omp_offload.failed30:
-// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i32 [[TMP66]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP68]]) #[[ATTR3]]
+// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i32 [[TMP64]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP66]]) #[[ATTR3]]
 // CHECK15-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
 // CHECK15:       omp_offload.cont31:
-// CHECK15-NEXT:    [[TMP104:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP104]], i32* [[N_CASTED32]], align 4
-// CHECK15-NEXT:    [[TMP105:%.*]] = load i32, i32* [[N_CASTED32]], align 4
-// CHECK15-NEXT:    [[TMP106:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK15-NEXT:    [[TMP107:%.*]] = sext i32 [[TMP106]] to i64
-// CHECK15-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP105]], i32* [[TMP109]], align 4
-// CHECK15-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP100:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP100]], i32* [[N_CASTED32]], align 4
+// CHECK15-NEXT:    [[TMP101:%.*]] = load i32, i32* [[N_CASTED32]], align 4
+// CHECK15-NEXT:    [[TMP102:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK15-NEXT:    [[TMP103:%.*]] = sext i32 [[TMP102]] to i64
+// CHECK15-NEXT:    [[TMP104:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES36]] to i8*
+// CHECK15-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP104]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i32 24, i1 false)
+// CHECK15-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP101]], i32* [[TMP106]], align 4
+// CHECK15-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP101]], i32* [[TMP108]], align 4
+// CHECK15-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 0
+// CHECK15-NEXT:    store i8* null, i8** [[TMP109]], align 4
+// CHECK15-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 1
 // CHECK15-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP105]], i32* [[TMP111]], align 4
-// CHECK15-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 0
-// CHECK15-NEXT:    store i64 4, i64* [[TMP112]], align 4
-// CHECK15-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 0
-// CHECK15-NEXT:    store i8* null, i8** [[TMP113]], align 4
-// CHECK15-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 1
-// CHECK15-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP115]], align 4
-// CHECK15-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 1
-// CHECK15-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP117]], align 4
-// CHECK15-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 1
-// CHECK15-NEXT:    store i64 4, i64* [[TMP118]], align 4
-// CHECK15-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 1
-// CHECK15-NEXT:    store i8* null, i8** [[TMP119]], align 4
-// CHECK15-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP121:%.*]] = bitcast i8** [[TMP120]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP121]], align 4
-// CHECK15-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP123]], align 4
-// CHECK15-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 2
-// CHECK15-NEXT:    store i64 [[TMP107]], i64* [[TMP124]], align 4
-// CHECK15-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 2
-// CHECK15-NEXT:    store i8* null, i8** [[TMP125]], align 4
-// CHECK15-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP129:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP129]], i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK15-NEXT:    [[TMP130:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK15-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[TMP130]], 0
+// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP111]], align 4
+// CHECK15-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP113]], align 4
+// CHECK15-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 1
+// CHECK15-NEXT:    store i8* null, i8** [[TMP114]], align 4
+// CHECK15-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP116]], align 4
+// CHECK15-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP118]], align 4
+// CHECK15-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 2
+// CHECK15-NEXT:    store i64 [[TMP103]], i64* [[TMP119]], align 4
+// CHECK15-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 2
+// CHECK15-NEXT:    store i8* null, i8** [[TMP120]], align 4
+// CHECK15-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP124:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP124]], i32* [[DOTCAPTURE_EXPR_38]], align 4
+// CHECK15-NEXT:    [[TMP125:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
+// CHECK15-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[TMP125]], 0
 // CHECK15-NEXT:    [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1
 // CHECK15-NEXT:    [[SUB42:%.*]] = sub nsw i32 [[DIV41]], 1
 // CHECK15-NEXT:    store i32 [[SUB42]], i32* [[DOTCAPTURE_EXPR_39]], align 4
-// CHECK15-NEXT:    [[TMP131:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4
-// CHECK15-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP131]], 1
-// CHECK15-NEXT:    [[TMP132:%.*]] = zext i32 [[ADD43]] to i64
-// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP132]])
-// CHECK15-NEXT:    [[TMP133:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP126]], i8** [[TMP127]], i64* [[TMP128]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK15-NEXT:    [[TMP134:%.*]] = icmp ne i32 [[TMP133]], 0
-// CHECK15-NEXT:    br i1 [[TMP134]], label [[OMP_OFFLOAD_FAILED44:%.*]], label [[OMP_OFFLOAD_CONT45:%.*]]
+// CHECK15-NEXT:    [[TMP126:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4
+// CHECK15-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP126]], 1
+// CHECK15-NEXT:    [[TMP127:%.*]] = zext i32 [[ADD43]] to i64
+// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP127]])
+// CHECK15-NEXT:    [[TMP128:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP121]], i8** [[TMP122]], i64* [[TMP123]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP129:%.*]] = icmp ne i32 [[TMP128]], 0
+// CHECK15-NEXT:    br i1 [[TMP129]], label [[OMP_OFFLOAD_FAILED44:%.*]], label [[OMP_OFFLOAD_CONT45:%.*]]
 // CHECK15:       omp_offload.failed44:
-// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i32 [[TMP105]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i32 [[TMP101]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK15-NEXT:    br label [[OMP_OFFLOAD_CONT45]]
 // CHECK15:       omp_offload.cont45:
-// CHECK15-NEXT:    [[TMP135:%.*]] = load i32, i32* [[M]], align 4
-// CHECK15-NEXT:    store i32 [[TMP135]], i32* [[DOTCAPTURE_EXPR_46]], align 4
-// CHECK15-NEXT:    [[TMP136:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP136]], i32* [[N_CASTED47]], align 4
-// CHECK15-NEXT:    [[TMP137:%.*]] = load i32, i32* [[N_CASTED47]], align 4
-// CHECK15-NEXT:    [[TMP138:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_46]], align 4
-// CHECK15-NEXT:    store i32 [[TMP138]], i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
-// CHECK15-NEXT:    [[TMP139:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
-// CHECK15-NEXT:    [[TMP140:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK15-NEXT:    [[TMP141:%.*]] = sext i32 [[TMP140]] to i64
-// CHECK15-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP137]], i32* [[TMP143]], align 4
-// CHECK15-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP137]], i32* [[TMP145]], align 4
-// CHECK15-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 0
-// CHECK15-NEXT:    store i64 4, i64* [[TMP146]], align 4
-// CHECK15-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP130:%.*]] = load i32, i32* [[M]], align 4
+// CHECK15-NEXT:    store i32 [[TMP130]], i32* [[DOTCAPTURE_EXPR_46]], align 4
+// CHECK15-NEXT:    [[TMP131:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP131]], i32* [[N_CASTED47]], align 4
+// CHECK15-NEXT:    [[TMP132:%.*]] = load i32, i32* [[N_CASTED47]], align 4
+// CHECK15-NEXT:    [[TMP133:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_46]], align 4
+// CHECK15-NEXT:    store i32 [[TMP133]], i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
+// CHECK15-NEXT:    [[TMP134:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
+// CHECK15-NEXT:    [[TMP135:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK15-NEXT:    [[TMP136:%.*]] = sext i32 [[TMP135]] to i64
+// CHECK15-NEXT:    [[TMP137:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES52]] to i8*
+// CHECK15-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP137]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i32 32, i1 false)
+// CHECK15-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP132]], i32* [[TMP139]], align 4
+// CHECK15-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP132]], i32* [[TMP141]], align 4
+// CHECK15-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 0
+// CHECK15-NEXT:    store i8* null, i8** [[TMP142]], align 4
+// CHECK15-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP144]], align 4
+// CHECK15-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP146]], align 4
+// CHECK15-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 1
 // CHECK15-NEXT:    store i8* null, i8** [[TMP147]], align 4
-// CHECK15-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 1
-// CHECK15-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP149]], align 4
-// CHECK15-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 1
-// CHECK15-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP151]], align 4
-// CHECK15-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 1
-// CHECK15-NEXT:    store i64 4, i64* [[TMP152]], align 4
-// CHECK15-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP149]], align 4
+// CHECK15-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP151]], align 4
+// CHECK15-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 2
+// CHECK15-NEXT:    store i64 [[TMP136]], i64* [[TMP152]], align 4
+// CHECK15-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 2
 // CHECK15-NEXT:    store i8* null, i8** [[TMP153]], align 4
-// CHECK15-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP155]], align 4
-// CHECK15-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP157]], align 4
-// CHECK15-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 2
-// CHECK15-NEXT:    store i64 [[TMP141]], i64* [[TMP158]], align 4
-// CHECK15-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 2
-// CHECK15-NEXT:    store i8* null, i8** [[TMP159]], align 4
-// CHECK15-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 3
-// CHECK15-NEXT:    [[TMP161:%.*]] = bitcast i8** [[TMP160]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP139]], i32* [[TMP161]], align 4
-// CHECK15-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 3
-// CHECK15-NEXT:    [[TMP163:%.*]] = bitcast i8** [[TMP162]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP139]], i32* [[TMP163]], align 4
-// CHECK15-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 3
-// CHECK15-NEXT:    store i64 4, i64* [[TMP164]], align 4
-// CHECK15-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 3
-// CHECK15-NEXT:    store i8* null, i8** [[TMP165]], align 4
-// CHECK15-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP169:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP169]], i32* [[DOTCAPTURE_EXPR_54]], align 4
-// CHECK15-NEXT:    [[TMP170:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_54]], align 4
-// CHECK15-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[TMP170]], 0
+// CHECK15-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 3
+// CHECK15-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP134]], i32* [[TMP155]], align 4
+// CHECK15-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 3
+// CHECK15-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP134]], i32* [[TMP157]], align 4
+// CHECK15-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 3
+// CHECK15-NEXT:    store i8* null, i8** [[TMP158]], align 4
+// CHECK15-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP162:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP162]], i32* [[DOTCAPTURE_EXPR_54]], align 4
+// CHECK15-NEXT:    [[TMP163:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_54]], align 4
+// CHECK15-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[TMP163]], 0
 // CHECK15-NEXT:    [[DIV57:%.*]] = sdiv i32 [[SUB56]], 1
 // CHECK15-NEXT:    [[SUB58:%.*]] = sub nsw i32 [[DIV57]], 1
 // CHECK15-NEXT:    store i32 [[SUB58]], i32* [[DOTCAPTURE_EXPR_55]], align 4
-// CHECK15-NEXT:    [[TMP171:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_55]], align 4
-// CHECK15-NEXT:    [[ADD59:%.*]] = add nsw i32 [[TMP171]], 1
-// CHECK15-NEXT:    [[TMP172:%.*]] = zext i32 [[ADD59]] to i64
-// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP172]])
-// CHECK15-NEXT:    [[TMP173:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP166]], i8** [[TMP167]], i64* [[TMP168]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK15-NEXT:    [[TMP174:%.*]] = icmp ne i32 [[TMP173]], 0
-// CHECK15-NEXT:    br i1 [[TMP174]], label [[OMP_OFFLOAD_FAILED60:%.*]], label [[OMP_OFFLOAD_CONT61:%.*]]
+// CHECK15-NEXT:    [[TMP164:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_55]], align 4
+// CHECK15-NEXT:    [[ADD59:%.*]] = add nsw i32 [[TMP164]], 1
+// CHECK15-NEXT:    [[TMP165:%.*]] = zext i32 [[ADD59]] to i64
+// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP165]])
+// CHECK15-NEXT:    [[TMP166:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP159]], i8** [[TMP160]], i64* [[TMP161]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP167:%.*]] = icmp ne i32 [[TMP166]], 0
+// CHECK15-NEXT:    br i1 [[TMP167]], label [[OMP_OFFLOAD_FAILED60:%.*]], label [[OMP_OFFLOAD_CONT61:%.*]]
 // CHECK15:       omp_offload.failed60:
-// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i32 [[TMP137]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP139]]) #[[ATTR3]]
+// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i32 [[TMP132]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP134]]) #[[ATTR3]]
 // CHECK15-NEXT:    br label [[OMP_OFFLOAD_CONT61]]
 // CHECK15:       omp_offload.cont61:
-// CHECK15-NEXT:    [[TMP175:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK15-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP175]])
+// CHECK15-NEXT:    [[TMP168:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK15-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP168]])
 // CHECK15-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK15-NEXT:    [[TMP176:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK15-NEXT:    call void @llvm.stackrestore(i8* [[TMP176]])
-// CHECK15-NEXT:    [[TMP177:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK15-NEXT:    ret i32 [[TMP177]]
+// CHECK15-NEXT:    [[TMP169:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK15-NEXT:    call void @llvm.stackrestore(i8* [[TMP169]])
+// CHECK15-NEXT:    [[TMP170:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK15-NEXT:    ret i32 [[TMP170]]
 //
 //
 // CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139
@@ -13264,11 +13222,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK15-NEXT:    store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK15-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -13349,7 +13307,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK15-NEXT:    store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK15-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]])
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -13391,7 +13349,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -13509,11 +13467,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4
 // CHECK15-NEXT:    store i32 [[TMP2]], i32* [[N_CASTED]], align 4
 // CHECK15-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -13586,7 +13544,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4
 // CHECK15-NEXT:    store i32 [[TMP17]], i32* [[N_CASTED]], align 4
 // CHECK15-NEXT:    [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]])
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -13605,7 +13563,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -13723,11 +13681,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK15-NEXT:    store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK15-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -13806,7 +13764,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK15-NEXT:    store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK15-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]])
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -13825,7 +13783,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -13926,7 +13884,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK15-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK15-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK15-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -13968,7 +13926,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK15-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK15-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK15-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK15:       omp_offload.failed:
@@ -13986,7 +13944,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK15-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK15-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK15-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK15:       omp_offload.failed5:
@@ -14017,7 +13975,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK15-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK15-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK15-NEXT:    br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK15:       omp_offload.failed11:
@@ -14035,7 +13993,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK15-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK15-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK15-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK15:       omp_offload.failed17:
@@ -14066,7 +14024,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
 // CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK15-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK15-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
 // CHECK15-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED25:%.*]], label [[OMP_OFFLOAD_CONT26:%.*]]
 // CHECK15:       omp_offload.failed25:
@@ -14082,11 +14040,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK15-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK15-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -14132,7 +14090,7 @@ int main (int argc, char **argv) {
 // CHECK15:       omp.inner.for.body:
 // CHECK15-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK15-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -14147,7 +14105,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -14227,11 +14185,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK15-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK15-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -14277,7 +14235,7 @@ int main (int argc, char **argv) {
 // CHECK15:       omp.inner.for.body:
 // CHECK15-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK15-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -14292,7 +14250,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -14378,11 +14336,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK15-NEXT:    store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK15-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -14434,7 +14392,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK15-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK15-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -14449,7 +14407,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -14551,11 +14509,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK15-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK15-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -14601,7 +14559,7 @@ int main (int argc, char **argv) {
 // CHECK15:       omp.inner.for.body:
 // CHECK15-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK15-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -14616,7 +14574,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -14700,11 +14658,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK15-NEXT:    store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK15-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -14756,7 +14714,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK15-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK15-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -14771,7 +14729,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -14847,7 +14805,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK15-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK15-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK15-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK15-NEXT:    ret void
@@ -14922,14 +14880,14 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK16-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK16-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK16-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK16-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK16-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK16-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK16-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK16-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK16-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK16-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK16-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK16-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK16-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -14938,302 +14896,288 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK16-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK16-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK16-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK16-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK16-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK16-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK16-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK16-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK16-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK16-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK16-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK16-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK16-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK16-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK16-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK16-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK16-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK16-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK16-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK16-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK16-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK16-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK16-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK16-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK16-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK16-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK16-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK16-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK16-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK16-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK16-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK16-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK16-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK16-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK16-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK16-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK16-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK16-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK16-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK16:       omp_offload.failed:
 // CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK16-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK16:       omp_offload.cont:
-// CHECK16-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK16-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK16-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK16-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK16-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK16-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK16-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK16-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK16-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK16-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i32 24, i1 false)
 // CHECK16-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK16-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK16-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK16-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK16-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK16-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK16-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK16-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK16-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK16-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK16-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK16-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK16-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK16-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK16-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK16-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK16-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK16-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK16-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK16-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK16-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK16-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK16-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK16-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK16-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK16-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK16-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK16-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK16-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK16-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK16-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK16-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK16-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK16-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK16-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK16-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK16-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK16-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK16-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK16-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK16-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK16-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK16-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK16-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK16-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK16-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK16-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP61]])
-// CHECK16-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK16-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK16-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK16-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK16-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK16-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP59]])
+// CHECK16-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK16-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK16:       omp_offload.failed15:
-// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK16-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK16:       omp_offload.cont16:
-// CHECK16-NEXT:    [[TMP64:%.*]] = load i32, i32* [[M]], align 4
-// CHECK16-NEXT:    store i32 [[TMP64]], i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK16-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP65]], i32* [[N_CASTED18]], align 4
-// CHECK16-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N_CASTED18]], align 4
-// CHECK16-NEXT:    [[TMP67:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK16-NEXT:    store i32 [[TMP67]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK16-NEXT:    [[TMP68:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK16-NEXT:    [[TMP69:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK16-NEXT:    [[TMP70:%.*]] = sext i32 [[TMP69]] to i64
-// CHECK16-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP66]], i32* [[TMP72]], align 4
-// CHECK16-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP66]], i32* [[TMP74]], align 4
-// CHECK16-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK16-NEXT:    store i64 4, i64* [[TMP75]], align 4
-// CHECK16-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
-// CHECK16-NEXT:    store i8* null, i8** [[TMP76]], align 4
-// CHECK16-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP62:%.*]] = load i32, i32* [[M]], align 4
+// CHECK16-NEXT:    store i32 [[TMP62]], i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK16-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP63]], i32* [[N_CASTED18]], align 4
+// CHECK16-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N_CASTED18]], align 4
+// CHECK16-NEXT:    [[TMP65:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK16-NEXT:    store i32 [[TMP65]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK16-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK16-NEXT:    [[TMP67:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK16-NEXT:    [[TMP68:%.*]] = sext i32 [[TMP67]] to i64
+// CHECK16-NEXT:    [[TMP69:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES22]] to i8*
+// CHECK16-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP69]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i32 32, i1 false)
+// CHECK16-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP64]], i32* [[TMP71]], align 4
+// CHECK16-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP64]], i32* [[TMP73]], align 4
+// CHECK16-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
+// CHECK16-NEXT:    store i8* null, i8** [[TMP74]], align 4
+// CHECK16-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP76]], align 4
+// CHECK16-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
 // CHECK16-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
 // CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP78]], align 4
-// CHECK16-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
-// CHECK16-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
-// CHECK16-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 1
-// CHECK16-NEXT:    store i64 4, i64* [[TMP81]], align 4
-// CHECK16-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
-// CHECK16-NEXT:    store i8* null, i8** [[TMP82]], align 4
-// CHECK16-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 4
-// CHECK16-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP86]], align 4
-// CHECK16-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
-// CHECK16-NEXT:    store i64 [[TMP70]], i64* [[TMP87]], align 4
-// CHECK16-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
-// CHECK16-NEXT:    store i8* null, i8** [[TMP88]], align 4
-// CHECK16-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
-// CHECK16-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP68]], i32* [[TMP90]], align 4
-// CHECK16-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
-// CHECK16-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP68]], i32* [[TMP92]], align 4
-// CHECK16-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 3
-// CHECK16-NEXT:    store i64 4, i64* [[TMP93]], align 4
-// CHECK16-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
-// CHECK16-NEXT:    store i8* null, i8** [[TMP94]], align 4
-// CHECK16-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP98:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP98]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK16-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK16-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP99]], 0
+// CHECK16-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
+// CHECK16-NEXT:    store i8* null, i8** [[TMP79]], align 4
+// CHECK16-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 4
+// CHECK16-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 4
+// CHECK16-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
+// CHECK16-NEXT:    store i64 [[TMP68]], i64* [[TMP84]], align 4
+// CHECK16-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
+// CHECK16-NEXT:    store i8* null, i8** [[TMP85]], align 4
+// CHECK16-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
+// CHECK16-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP66]], i32* [[TMP87]], align 4
+// CHECK16-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
+// CHECK16-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP66]], i32* [[TMP89]], align 4
+// CHECK16-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
+// CHECK16-NEXT:    store i8* null, i8** [[TMP90]], align 4
+// CHECK16-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP94:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP94]], i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK16-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK16-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP95]], 0
 // CHECK16-NEXT:    [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1
 // CHECK16-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[DIV27]], 1
 // CHECK16-NEXT:    store i32 [[SUB28]], i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK16-NEXT:    [[TMP100:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK16-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP100]], 1
-// CHECK16-NEXT:    [[TMP101:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP101]])
-// CHECK16-NEXT:    [[TMP102:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP95]], i8** [[TMP96]], i64* [[TMP97]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK16-NEXT:    [[TMP103:%.*]] = icmp ne i32 [[TMP102]], 0
-// CHECK16-NEXT:    br i1 [[TMP103]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK16-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK16-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP96]], 1
+// CHECK16-NEXT:    [[TMP97:%.*]] = zext i32 [[ADD29]] to i64
+// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP97]])
+// CHECK16-NEXT:    [[TMP98:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP91]], i8** [[TMP92]], i64* [[TMP93]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP99:%.*]] = icmp ne i32 [[TMP98]], 0
+// CHECK16-NEXT:    br i1 [[TMP99]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
 // CHECK16:       omp_offload.failed30:
-// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i32 [[TMP66]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP68]]) #[[ATTR3]]
+// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i32 [[TMP64]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP66]]) #[[ATTR3]]
 // CHECK16-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
 // CHECK16:       omp_offload.cont31:
-// CHECK16-NEXT:    [[TMP104:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP104]], i32* [[N_CASTED32]], align 4
-// CHECK16-NEXT:    [[TMP105:%.*]] = load i32, i32* [[N_CASTED32]], align 4
-// CHECK16-NEXT:    [[TMP106:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK16-NEXT:    [[TMP107:%.*]] = sext i32 [[TMP106]] to i64
-// CHECK16-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP105]], i32* [[TMP109]], align 4
-// CHECK16-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP100:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP100]], i32* [[N_CASTED32]], align 4
+// CHECK16-NEXT:    [[TMP101:%.*]] = load i32, i32* [[N_CASTED32]], align 4
+// CHECK16-NEXT:    [[TMP102:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK16-NEXT:    [[TMP103:%.*]] = sext i32 [[TMP102]] to i64
+// CHECK16-NEXT:    [[TMP104:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES36]] to i8*
+// CHECK16-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP104]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i32 24, i1 false)
+// CHECK16-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP101]], i32* [[TMP106]], align 4
+// CHECK16-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP101]], i32* [[TMP108]], align 4
+// CHECK16-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 0
+// CHECK16-NEXT:    store i8* null, i8** [[TMP109]], align 4
+// CHECK16-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 1
 // CHECK16-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP105]], i32* [[TMP111]], align 4
-// CHECK16-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 0
-// CHECK16-NEXT:    store i64 4, i64* [[TMP112]], align 4
-// CHECK16-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 0
-// CHECK16-NEXT:    store i8* null, i8** [[TMP113]], align 4
-// CHECK16-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 1
-// CHECK16-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP115]], align 4
-// CHECK16-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 1
-// CHECK16-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP117]], align 4
-// CHECK16-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 1
-// CHECK16-NEXT:    store i64 4, i64* [[TMP118]], align 4
-// CHECK16-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 1
-// CHECK16-NEXT:    store i8* null, i8** [[TMP119]], align 4
-// CHECK16-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP121:%.*]] = bitcast i8** [[TMP120]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP121]], align 4
-// CHECK16-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP123]], align 4
-// CHECK16-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 2
-// CHECK16-NEXT:    store i64 [[TMP107]], i64* [[TMP124]], align 4
-// CHECK16-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 2
-// CHECK16-NEXT:    store i8* null, i8** [[TMP125]], align 4
-// CHECK16-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP129:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP129]], i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK16-NEXT:    [[TMP130:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK16-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[TMP130]], 0
+// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP111]], align 4
+// CHECK16-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP113]], align 4
+// CHECK16-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 1
+// CHECK16-NEXT:    store i8* null, i8** [[TMP114]], align 4
+// CHECK16-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP116]], align 4
+// CHECK16-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP118]], align 4
+// CHECK16-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 2
+// CHECK16-NEXT:    store i64 [[TMP103]], i64* [[TMP119]], align 4
+// CHECK16-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 2
+// CHECK16-NEXT:    store i8* null, i8** [[TMP120]], align 4
+// CHECK16-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP124:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP124]], i32* [[DOTCAPTURE_EXPR_38]], align 4
+// CHECK16-NEXT:    [[TMP125:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
+// CHECK16-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[TMP125]], 0
 // CHECK16-NEXT:    [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1
 // CHECK16-NEXT:    [[SUB42:%.*]] = sub nsw i32 [[DIV41]], 1
 // CHECK16-NEXT:    store i32 [[SUB42]], i32* [[DOTCAPTURE_EXPR_39]], align 4
-// CHECK16-NEXT:    [[TMP131:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4
-// CHECK16-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP131]], 1
-// CHECK16-NEXT:    [[TMP132:%.*]] = zext i32 [[ADD43]] to i64
-// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP132]])
-// CHECK16-NEXT:    [[TMP133:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP126]], i8** [[TMP127]], i64* [[TMP128]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK16-NEXT:    [[TMP134:%.*]] = icmp ne i32 [[TMP133]], 0
-// CHECK16-NEXT:    br i1 [[TMP134]], label [[OMP_OFFLOAD_FAILED44:%.*]], label [[OMP_OFFLOAD_CONT45:%.*]]
+// CHECK16-NEXT:    [[TMP126:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4
+// CHECK16-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP126]], 1
+// CHECK16-NEXT:    [[TMP127:%.*]] = zext i32 [[ADD43]] to i64
+// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP127]])
+// CHECK16-NEXT:    [[TMP128:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP121]], i8** [[TMP122]], i64* [[TMP123]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP129:%.*]] = icmp ne i32 [[TMP128]], 0
+// CHECK16-NEXT:    br i1 [[TMP129]], label [[OMP_OFFLOAD_FAILED44:%.*]], label [[OMP_OFFLOAD_CONT45:%.*]]
 // CHECK16:       omp_offload.failed44:
-// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i32 [[TMP105]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i32 [[TMP101]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK16-NEXT:    br label [[OMP_OFFLOAD_CONT45]]
 // CHECK16:       omp_offload.cont45:
-// CHECK16-NEXT:    [[TMP135:%.*]] = load i32, i32* [[M]], align 4
-// CHECK16-NEXT:    store i32 [[TMP135]], i32* [[DOTCAPTURE_EXPR_46]], align 4
-// CHECK16-NEXT:    [[TMP136:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP136]], i32* [[N_CASTED47]], align 4
-// CHECK16-NEXT:    [[TMP137:%.*]] = load i32, i32* [[N_CASTED47]], align 4
-// CHECK16-NEXT:    [[TMP138:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_46]], align 4
-// CHECK16-NEXT:    store i32 [[TMP138]], i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
-// CHECK16-NEXT:    [[TMP139:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
-// CHECK16-NEXT:    [[TMP140:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK16-NEXT:    [[TMP141:%.*]] = sext i32 [[TMP140]] to i64
-// CHECK16-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP137]], i32* [[TMP143]], align 4
-// CHECK16-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP137]], i32* [[TMP145]], align 4
-// CHECK16-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 0
-// CHECK16-NEXT:    store i64 4, i64* [[TMP146]], align 4
-// CHECK16-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP130:%.*]] = load i32, i32* [[M]], align 4
+// CHECK16-NEXT:    store i32 [[TMP130]], i32* [[DOTCAPTURE_EXPR_46]], align 4
+// CHECK16-NEXT:    [[TMP131:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP131]], i32* [[N_CASTED47]], align 4
+// CHECK16-NEXT:    [[TMP132:%.*]] = load i32, i32* [[N_CASTED47]], align 4
+// CHECK16-NEXT:    [[TMP133:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_46]], align 4
+// CHECK16-NEXT:    store i32 [[TMP133]], i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
+// CHECK16-NEXT:    [[TMP134:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
+// CHECK16-NEXT:    [[TMP135:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK16-NEXT:    [[TMP136:%.*]] = sext i32 [[TMP135]] to i64
+// CHECK16-NEXT:    [[TMP137:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES52]] to i8*
+// CHECK16-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP137]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i32 32, i1 false)
+// CHECK16-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP132]], i32* [[TMP139]], align 4
+// CHECK16-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP132]], i32* [[TMP141]], align 4
+// CHECK16-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 0
+// CHECK16-NEXT:    store i8* null, i8** [[TMP142]], align 4
+// CHECK16-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP144]], align 4
+// CHECK16-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP146]], align 4
+// CHECK16-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 1
 // CHECK16-NEXT:    store i8* null, i8** [[TMP147]], align 4
-// CHECK16-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 1
-// CHECK16-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP149]], align 4
-// CHECK16-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 1
-// CHECK16-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP151]], align 4
-// CHECK16-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 1
-// CHECK16-NEXT:    store i64 4, i64* [[TMP152]], align 4
-// CHECK16-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP149]], align 4
+// CHECK16-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP151]], align 4
+// CHECK16-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 2
+// CHECK16-NEXT:    store i64 [[TMP136]], i64* [[TMP152]], align 4
+// CHECK16-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 2
 // CHECK16-NEXT:    store i8* null, i8** [[TMP153]], align 4
-// CHECK16-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP155]], align 4
-// CHECK16-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP157]], align 4
-// CHECK16-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 2
-// CHECK16-NEXT:    store i64 [[TMP141]], i64* [[TMP158]], align 4
-// CHECK16-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 2
-// CHECK16-NEXT:    store i8* null, i8** [[TMP159]], align 4
-// CHECK16-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 3
-// CHECK16-NEXT:    [[TMP161:%.*]] = bitcast i8** [[TMP160]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP139]], i32* [[TMP161]], align 4
-// CHECK16-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 3
-// CHECK16-NEXT:    [[TMP163:%.*]] = bitcast i8** [[TMP162]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP139]], i32* [[TMP163]], align 4
-// CHECK16-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 3
-// CHECK16-NEXT:    store i64 4, i64* [[TMP164]], align 4
-// CHECK16-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 3
-// CHECK16-NEXT:    store i8* null, i8** [[TMP165]], align 4
-// CHECK16-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP169:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP169]], i32* [[DOTCAPTURE_EXPR_54]], align 4
-// CHECK16-NEXT:    [[TMP170:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_54]], align 4
-// CHECK16-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[TMP170]], 0
+// CHECK16-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 3
+// CHECK16-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP134]], i32* [[TMP155]], align 4
+// CHECK16-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 3
+// CHECK16-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP134]], i32* [[TMP157]], align 4
+// CHECK16-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 3
+// CHECK16-NEXT:    store i8* null, i8** [[TMP158]], align 4
+// CHECK16-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP162:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP162]], i32* [[DOTCAPTURE_EXPR_54]], align 4
+// CHECK16-NEXT:    [[TMP163:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_54]], align 4
+// CHECK16-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[TMP163]], 0
 // CHECK16-NEXT:    [[DIV57:%.*]] = sdiv i32 [[SUB56]], 1
 // CHECK16-NEXT:    [[SUB58:%.*]] = sub nsw i32 [[DIV57]], 1
 // CHECK16-NEXT:    store i32 [[SUB58]], i32* [[DOTCAPTURE_EXPR_55]], align 4
-// CHECK16-NEXT:    [[TMP171:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_55]], align 4
-// CHECK16-NEXT:    [[ADD59:%.*]] = add nsw i32 [[TMP171]], 1
-// CHECK16-NEXT:    [[TMP172:%.*]] = zext i32 [[ADD59]] to i64
-// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP172]])
-// CHECK16-NEXT:    [[TMP173:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP166]], i8** [[TMP167]], i64* [[TMP168]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK16-NEXT:    [[TMP174:%.*]] = icmp ne i32 [[TMP173]], 0
-// CHECK16-NEXT:    br i1 [[TMP174]], label [[OMP_OFFLOAD_FAILED60:%.*]], label [[OMP_OFFLOAD_CONT61:%.*]]
+// CHECK16-NEXT:    [[TMP164:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_55]], align 4
+// CHECK16-NEXT:    [[ADD59:%.*]] = add nsw i32 [[TMP164]], 1
+// CHECK16-NEXT:    [[TMP165:%.*]] = zext i32 [[ADD59]] to i64
+// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP165]])
+// CHECK16-NEXT:    [[TMP166:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP159]], i8** [[TMP160]], i64* [[TMP161]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP167:%.*]] = icmp ne i32 [[TMP166]], 0
+// CHECK16-NEXT:    br i1 [[TMP167]], label [[OMP_OFFLOAD_FAILED60:%.*]], label [[OMP_OFFLOAD_CONT61:%.*]]
 // CHECK16:       omp_offload.failed60:
-// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i32 [[TMP137]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP139]]) #[[ATTR3]]
+// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i32 [[TMP132]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP134]]) #[[ATTR3]]
 // CHECK16-NEXT:    br label [[OMP_OFFLOAD_CONT61]]
 // CHECK16:       omp_offload.cont61:
-// CHECK16-NEXT:    [[TMP175:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK16-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP175]])
+// CHECK16-NEXT:    [[TMP168:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK16-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP168]])
 // CHECK16-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK16-NEXT:    [[TMP176:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK16-NEXT:    call void @llvm.stackrestore(i8* [[TMP176]])
-// CHECK16-NEXT:    [[TMP177:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK16-NEXT:    ret i32 [[TMP177]]
+// CHECK16-NEXT:    [[TMP169:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK16-NEXT:    call void @llvm.stackrestore(i8* [[TMP169]])
+// CHECK16-NEXT:    [[TMP170:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK16-NEXT:    ret i32 [[TMP170]]
 //
 //
 // CHECK16-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139
@@ -15681,11 +15625,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK16-NEXT:    store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK16-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -15766,7 +15710,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK16-NEXT:    store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK16-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]])
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -15808,7 +15752,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -15926,11 +15870,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4
 // CHECK16-NEXT:    store i32 [[TMP2]], i32* [[N_CASTED]], align 4
 // CHECK16-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16003,7 +15947,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4
 // CHECK16-NEXT:    store i32 [[TMP17]], i32* [[N_CASTED]], align 4
 // CHECK16-NEXT:    [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]])
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -16022,7 +15966,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16140,11 +16084,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK16-NEXT:    store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK16-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16223,7 +16167,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK16-NEXT:    store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK16-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]])
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -16242,7 +16186,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16343,7 +16287,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK16-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK16-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK16-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK16-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -16385,7 +16329,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK16-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK16-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK16-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK16:       omp_offload.failed:
@@ -16403,7 +16347,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK16-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK16-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK16-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK16:       omp_offload.failed5:
@@ -16434,7 +16378,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK16-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK16-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK16-NEXT:    br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK16:       omp_offload.failed11:
@@ -16452,7 +16396,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK16-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK16-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK16-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK16:       omp_offload.failed17:
@@ -16483,7 +16427,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
 // CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK16-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK16-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
 // CHECK16-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED25:%.*]], label [[OMP_OFFLOAD_CONT26:%.*]]
 // CHECK16:       omp_offload.failed25:
@@ -16499,11 +16443,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK16-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK16-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16549,7 +16493,7 @@ int main (int argc, char **argv) {
 // CHECK16:       omp.inner.for.body:
 // CHECK16-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK16-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -16564,7 +16508,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16644,11 +16588,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK16-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK16-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16694,7 +16638,7 @@ int main (int argc, char **argv) {
 // CHECK16:       omp.inner.for.body:
 // CHECK16-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK16-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -16709,7 +16653,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16795,11 +16739,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK16-NEXT:    store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK16-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16851,7 +16795,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK16-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK16-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -16866,7 +16810,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16968,11 +16912,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK16-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK16-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -17018,7 +16962,7 @@ int main (int argc, char **argv) {
 // CHECK16:       omp.inner.for.body:
 // CHECK16-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK16-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -17033,7 +16977,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -17117,11 +17061,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK16-NEXT:    store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK16-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -17173,7 +17117,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK16-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK16-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -17188,7 +17132,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -17264,7 +17208,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK16-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK16-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK16-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK16-NEXT:    ret void
@@ -17340,14 +17284,14 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK17-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK17-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK17-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK17-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK17-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK17-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK17-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK17-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK17-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK17-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK17-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -17356,304 +17300,290 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK17-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK17-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK17-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK17-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK17-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK17-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK17-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK17-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK17-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK17-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK17-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK17-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK17-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK17-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK17-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK17-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK17-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK17-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK17-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK17-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK17-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK17-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK17-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK17-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK17-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK17-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK17-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK17-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK17-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK17-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK17-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK17-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK17:       omp_offload.failed:
 // CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK17:       omp_offload.cont:
-// CHECK17-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK17-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK17-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK17-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK17-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK17-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK17-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK17-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i64 24, i1 false)
 // CHECK17-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK17-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK17-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK17-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK17-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK17-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK17-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK17-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK17-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK17-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK17-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK17-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK17-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK17-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK17-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK17-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK17-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK17-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK17-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK17-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK17-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK17-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK17-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK17-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK17-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK17-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK17-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK17-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK17-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK17-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK17-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK17-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK17-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK17-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK17-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK17-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK17-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK17-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK17-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP60]])
-// CHECK17-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK17-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK17-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK17-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK17-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP58]])
+// CHECK17-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK17-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK17:       omp_offload.failed16:
-// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK17:       omp_offload.cont17:
-// CHECK17-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M]], align 4
-// CHECK17-NEXT:    store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK17-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    [[TMP61:%.*]] = load i32, i32* [[M]], align 4
+// CHECK17-NEXT:    store i32 [[TMP61]], i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK17-NEXT:    [[TMP62:%.*]] = load i32, i32* [[N]], align 4
 // CHECK17-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP64]], i32* [[CONV20]], align 4
-// CHECK17-NEXT:    [[TMP65:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK17-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK17-NEXT:    store i32 [[TMP62]], i32* [[CONV20]], align 4
+// CHECK17-NEXT:    [[TMP63:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK17-NEXT:    [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
 // CHECK17-NEXT:    [[CONV21:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP66]], i32* [[CONV21]], align 4
-// CHECK17-NEXT:    [[TMP67:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK17-NEXT:    [[TMP68:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK17-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP65]], i64* [[TMP70]], align 8
-// CHECK17-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP65]], i64* [[TMP72]], align 8
-// CHECK17-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 4, i64* [[TMP73]], align 8
-// CHECK17-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
-// CHECK17-NEXT:    store i8* null, i8** [[TMP74]], align 8
-// CHECK17-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK17-NEXT:    store i32 [[TMP64]], i32* [[CONV21]], align 4
+// CHECK17-NEXT:    [[TMP65:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK17-NEXT:    [[TMP66:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK17-NEXT:    [[TMP67:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES25]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP67]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i64 32, i1 false)
+// CHECK17-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP63]], i64* [[TMP69]], align 8
+// CHECK17-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP63]], i64* [[TMP71]], align 8
+// CHECK17-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
+// CHECK17-NEXT:    store i8* null, i8** [[TMP72]], align 8
+// CHECK17-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP74]], align 8
+// CHECK17-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
 // CHECK17-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i64*
 // CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP76]], align 8
-// CHECK17-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
-// CHECK17-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 8, i64* [[TMP79]], align 8
-// CHECK17-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP80]], align 8
-// CHECK17-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP82]], align 8
-// CHECK17-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 8
-// CHECK17-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 [[TMP68]], i64* [[TMP85]], align 8
-// CHECK17-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP86]], align 8
-// CHECK17-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 8
-// CHECK17-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK17-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 3
-// CHECK17-NEXT:    store i64 4, i64* [[TMP91]], align 8
-// CHECK17-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
-// CHECK17-NEXT:    store i8* null, i8** [[TMP92]], align 8
-// CHECK17-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP96:%.*]] = load i32, i32* [[N]], align 4
-// CHECK17-NEXT:    store i32 [[TMP96]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK17-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK17-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP97]], 0
+// CHECK17-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP77]], align 8
+// CHECK17-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP79]], align 8
+// CHECK17-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 8
+// CHECK17-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
+// CHECK17-NEXT:    store i64 [[TMP66]], i64* [[TMP82]], align 8
+// CHECK17-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP83]], align 8
+// CHECK17-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP65]], i64* [[TMP85]], align 8
+// CHECK17-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP65]], i64* [[TMP87]], align 8
+// CHECK17-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
+// CHECK17-NEXT:    store i8* null, i8** [[TMP88]], align 8
+// CHECK17-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP92:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP92]], i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK17-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK17-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP93]], 0
 // CHECK17-NEXT:    [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1
 // CHECK17-NEXT:    [[SUB31:%.*]] = sub nsw i32 [[DIV30]], 1
 // CHECK17-NEXT:    store i32 [[SUB31]], i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK17-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK17-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP98]], 1
-// CHECK17-NEXT:    [[TMP99:%.*]] = zext i32 [[ADD32]] to i64
-// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP99]])
-// CHECK17-NEXT:    [[TMP100:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP93]], i8** [[TMP94]], i64* [[TMP95]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP101:%.*]] = icmp ne i32 [[TMP100]], 0
-// CHECK17-NEXT:    br i1 [[TMP101]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
+// CHECK17-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
+// CHECK17-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP94]], 1
+// CHECK17-NEXT:    [[TMP95:%.*]] = zext i32 [[ADD32]] to i64
+// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP95]])
+// CHECK17-NEXT:    [[TMP96:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP89]], i8** [[TMP90]], i64* [[TMP91]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP97:%.*]] = icmp ne i32 [[TMP96]], 0
+// CHECK17-NEXT:    br i1 [[TMP97]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
 // CHECK17:       omp_offload.failed33:
-// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i64 [[TMP65]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP67]]) #[[ATTR3]]
+// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i64 [[TMP63]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP65]]) #[[ATTR3]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT34]]
 // CHECK17:       omp_offload.cont34:
-// CHECK17-NEXT:    [[TMP102:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    [[TMP98:%.*]] = load i32, i32* [[N]], align 4
 // CHECK17-NEXT:    [[CONV36:%.*]] = bitcast i64* [[N_CASTED35]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP102]], i32* [[CONV36]], align 4
-// CHECK17-NEXT:    [[TMP103:%.*]] = load i64, i64* [[N_CASTED35]], align 8
-// CHECK17-NEXT:    [[TMP104:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK17-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP103]], i64* [[TMP106]], align 8
-// CHECK17-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
+// CHECK17-NEXT:    store i32 [[TMP98]], i32* [[CONV36]], align 4
+// CHECK17-NEXT:    [[TMP99:%.*]] = load i64, i64* [[N_CASTED35]], align 8
+// CHECK17-NEXT:    [[TMP100:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK17-NEXT:    [[TMP101:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES40]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP101]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i64 24, i1 false)
+// CHECK17-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP103:%.*]] = bitcast i8** [[TMP102]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP99]], i64* [[TMP103]], align 8
+// CHECK17-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP99]], i64* [[TMP105]], align 8
+// CHECK17-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 0
+// CHECK17-NEXT:    store i8* null, i8** [[TMP106]], align 8
+// CHECK17-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 1
 // CHECK17-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP103]], i64* [[TMP108]], align 8
-// CHECK17-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 4, i64* [[TMP109]], align 8
-// CHECK17-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 0
-// CHECK17-NEXT:    store i8* null, i8** [[TMP110]], align 8
-// CHECK17-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP112]], align 8
-// CHECK17-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP114]], align 8
-// CHECK17-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 8, i64* [[TMP115]], align 8
-// CHECK17-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP116]], align 8
-// CHECK17-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP118]], align 8
-// CHECK17-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP120]], align 8
-// CHECK17-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 [[TMP104]], i64* [[TMP121]], align 8
-// CHECK17-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP122]], align 8
-// CHECK17-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP126:%.*]] = load i32, i32* [[N]], align 4
-// CHECK17-NEXT:    store i32 [[TMP126]], i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK17-NEXT:    [[TMP127:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK17-NEXT:    [[SUB44:%.*]] = sub nsw i32 [[TMP127]], 0
+// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP108]], align 8
+// CHECK17-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP110]], align 8
+// CHECK17-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP111]], align 8
+// CHECK17-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP113]], align 8
+// CHECK17-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP115]], align 8
+// CHECK17-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 2
+// CHECK17-NEXT:    store i64 [[TMP100]], i64* [[TMP116]], align 8
+// CHECK17-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP117]], align 8
+// CHECK17-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP121:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP121]], i32* [[DOTCAPTURE_EXPR_42]], align 4
+// CHECK17-NEXT:    [[TMP122:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
+// CHECK17-NEXT:    [[SUB44:%.*]] = sub nsw i32 [[TMP122]], 0
 // CHECK17-NEXT:    [[DIV45:%.*]] = sdiv i32 [[SUB44]], 1
 // CHECK17-NEXT:    [[SUB46:%.*]] = sub nsw i32 [[DIV45]], 1
 // CHECK17-NEXT:    store i32 [[SUB46]], i32* [[DOTCAPTURE_EXPR_43]], align 4
-// CHECK17-NEXT:    [[TMP128:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_43]], align 4
-// CHECK17-NEXT:    [[ADD47:%.*]] = add nsw i32 [[TMP128]], 1
-// CHECK17-NEXT:    [[TMP129:%.*]] = zext i32 [[ADD47]] to i64
-// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP129]])
-// CHECK17-NEXT:    [[TMP130:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP123]], i8** [[TMP124]], i64* [[TMP125]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP131:%.*]] = icmp ne i32 [[TMP130]], 0
-// CHECK17-NEXT:    br i1 [[TMP131]], label [[OMP_OFFLOAD_FAILED48:%.*]], label [[OMP_OFFLOAD_CONT49:%.*]]
+// CHECK17-NEXT:    [[TMP123:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_43]], align 4
+// CHECK17-NEXT:    [[ADD47:%.*]] = add nsw i32 [[TMP123]], 1
+// CHECK17-NEXT:    [[TMP124:%.*]] = zext i32 [[ADD47]] to i64
+// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP124]])
+// CHECK17-NEXT:    [[TMP125:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP118]], i8** [[TMP119]], i64* [[TMP120]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP126:%.*]] = icmp ne i32 [[TMP125]], 0
+// CHECK17-NEXT:    br i1 [[TMP126]], label [[OMP_OFFLOAD_FAILED48:%.*]], label [[OMP_OFFLOAD_CONT49:%.*]]
 // CHECK17:       omp_offload.failed48:
-// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i64 [[TMP103]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i64 [[TMP99]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT49]]
 // CHECK17:       omp_offload.cont49:
-// CHECK17-NEXT:    [[TMP132:%.*]] = load i32, i32* [[M]], align 4
-// CHECK17-NEXT:    store i32 [[TMP132]], i32* [[DOTCAPTURE_EXPR_50]], align 4
-// CHECK17-NEXT:    [[TMP133:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    [[TMP127:%.*]] = load i32, i32* [[M]], align 4
+// CHECK17-NEXT:    store i32 [[TMP127]], i32* [[DOTCAPTURE_EXPR_50]], align 4
+// CHECK17-NEXT:    [[TMP128:%.*]] = load i32, i32* [[N]], align 4
 // CHECK17-NEXT:    [[CONV52:%.*]] = bitcast i64* [[N_CASTED51]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP133]], i32* [[CONV52]], align 4
-// CHECK17-NEXT:    [[TMP134:%.*]] = load i64, i64* [[N_CASTED51]], align 8
-// CHECK17-NEXT:    [[TMP135:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_50]], align 4
+// CHECK17-NEXT:    store i32 [[TMP128]], i32* [[CONV52]], align 4
+// CHECK17-NEXT:    [[TMP129:%.*]] = load i64, i64* [[N_CASTED51]], align 8
+// CHECK17-NEXT:    [[TMP130:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_50]], align 4
 // CHECK17-NEXT:    [[CONV54:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED53]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP135]], i32* [[CONV54]], align 4
-// CHECK17-NEXT:    [[TMP136:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED53]], align 8
-// CHECK17-NEXT:    [[TMP137:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK17-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP134]], i64* [[TMP139]], align 8
-// CHECK17-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP134]], i64* [[TMP141]], align 8
-// CHECK17-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 4, i64* [[TMP142]], align 8
-// CHECK17-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 0
+// CHECK17-NEXT:    store i32 [[TMP130]], i32* [[CONV54]], align 4
+// CHECK17-NEXT:    [[TMP131:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED53]], align 8
+// CHECK17-NEXT:    [[TMP132:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK17-NEXT:    [[TMP133:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES58]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP133]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i64 32, i1 false)
+// CHECK17-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP129]], i64* [[TMP135]], align 8
+// CHECK17-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP129]], i64* [[TMP137]], align 8
+// CHECK17-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 0
+// CHECK17-NEXT:    store i8* null, i8** [[TMP138]], align 8
+// CHECK17-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP140]], align 8
+// CHECK17-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP142]], align 8
+// CHECK17-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 1
 // CHECK17-NEXT:    store i8* null, i8** [[TMP143]], align 8
-// CHECK17-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP145]], align 8
-// CHECK17-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP147]], align 8
-// CHECK17-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 8, i64* [[TMP148]], align 8
-// CHECK17-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 1
+// CHECK17-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP145]], align 8
+// CHECK17-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP147]], align 8
+// CHECK17-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 2
+// CHECK17-NEXT:    store i64 [[TMP132]], i64* [[TMP148]], align 8
+// CHECK17-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 2
 // CHECK17-NEXT:    store i8* null, i8** [[TMP149]], align 8
-// CHECK17-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP151]], align 8
-// CHECK17-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP153]], align 8
-// CHECK17-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 [[TMP137]], i64* [[TMP154]], align 8
-// CHECK17-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP155]], align 8
-// CHECK17-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP136]], i64* [[TMP157]], align 8
-// CHECK17-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP136]], i64* [[TMP159]], align 8
-// CHECK17-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 3
-// CHECK17-NEXT:    store i64 4, i64* [[TMP160]], align 8
-// CHECK17-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 3
-// CHECK17-NEXT:    store i8* null, i8** [[TMP161]], align 8
-// CHECK17-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP165:%.*]] = load i32, i32* [[N]], align 4
-// CHECK17-NEXT:    store i32 [[TMP165]], i32* [[DOTCAPTURE_EXPR_60]], align 4
-// CHECK17-NEXT:    [[TMP166:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_60]], align 4
-// CHECK17-NEXT:    [[SUB62:%.*]] = sub nsw i32 [[TMP166]], 0
+// CHECK17-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP131]], i64* [[TMP151]], align 8
+// CHECK17-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP131]], i64* [[TMP153]], align 8
+// CHECK17-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 3
+// CHECK17-NEXT:    store i8* null, i8** [[TMP154]], align 8
+// CHECK17-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP158:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP158]], i32* [[DOTCAPTURE_EXPR_60]], align 4
+// CHECK17-NEXT:    [[TMP159:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_60]], align 4
+// CHECK17-NEXT:    [[SUB62:%.*]] = sub nsw i32 [[TMP159]], 0
 // CHECK17-NEXT:    [[DIV63:%.*]] = sdiv i32 [[SUB62]], 1
 // CHECK17-NEXT:    [[SUB64:%.*]] = sub nsw i32 [[DIV63]], 1
 // CHECK17-NEXT:    store i32 [[SUB64]], i32* [[DOTCAPTURE_EXPR_61]], align 4
-// CHECK17-NEXT:    [[TMP167:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_61]], align 4
-// CHECK17-NEXT:    [[ADD65:%.*]] = add nsw i32 [[TMP167]], 1
-// CHECK17-NEXT:    [[TMP168:%.*]] = zext i32 [[ADD65]] to i64
-// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP168]])
-// CHECK17-NEXT:    [[TMP169:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP162]], i8** [[TMP163]], i64* [[TMP164]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP170:%.*]] = icmp ne i32 [[TMP169]], 0
-// CHECK17-NEXT:    br i1 [[TMP170]], label [[OMP_OFFLOAD_FAILED66:%.*]], label [[OMP_OFFLOAD_CONT67:%.*]]
+// CHECK17-NEXT:    [[TMP160:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_61]], align 4
+// CHECK17-NEXT:    [[ADD65:%.*]] = add nsw i32 [[TMP160]], 1
+// CHECK17-NEXT:    [[TMP161:%.*]] = zext i32 [[ADD65]] to i64
+// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP161]])
+// CHECK17-NEXT:    [[TMP162:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP155]], i8** [[TMP156]], i64* [[TMP157]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP163:%.*]] = icmp ne i32 [[TMP162]], 0
+// CHECK17-NEXT:    br i1 [[TMP163]], label [[OMP_OFFLOAD_FAILED66:%.*]], label [[OMP_OFFLOAD_CONT67:%.*]]
 // CHECK17:       omp_offload.failed66:
-// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i64 [[TMP134]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP136]]) #[[ATTR3]]
+// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i64 [[TMP129]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP131]]) #[[ATTR3]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT67]]
 // CHECK17:       omp_offload.cont67:
-// CHECK17-NEXT:    [[TMP171:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK17-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP171]])
+// CHECK17-NEXT:    [[TMP164:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK17-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP164]])
 // CHECK17-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK17-NEXT:    [[TMP172:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP172]])
-// CHECK17-NEXT:    [[TMP173:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK17-NEXT:    ret i32 [[TMP173]]
+// CHECK17-NEXT:    [[TMP165:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP165]])
+// CHECK17-NEXT:    [[TMP166:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK17-NEXT:    ret i32 [[TMP166]]
 //
 //
 // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139
@@ -18125,11 +18055,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK17-NEXT:    [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -18216,7 +18146,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP22]], i32* [[CONV9]], align 4
 // CHECK17-NEXT:    [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]])
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -18258,7 +18188,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -18383,11 +18313,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK17-NEXT:    [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -18464,7 +18394,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP19]], i32* [[CONV6]], align 4
 // CHECK17-NEXT:    [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]])
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -18483,7 +18413,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -18609,11 +18539,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK17-NEXT:    [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -18698,7 +18628,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP21]], i32* [[CONV9]], align 4
 // CHECK17-NEXT:    [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]])
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -18717,7 +18647,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -18823,7 +18753,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK17-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK17-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK17-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -18865,7 +18795,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK17-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK17-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK17-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK17:       omp_offload.failed:
@@ -18883,7 +18813,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK17-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK17-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK17-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK17:       omp_offload.failed5:
@@ -18915,7 +18845,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK17-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK17-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK17-NEXT:    br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK17:       omp_offload.failed11:
@@ -18933,7 +18863,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK17-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK17-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK17-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK17:       omp_offload.failed17:
@@ -18965,7 +18895,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
 // CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK17-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK17-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
 // CHECK17-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED26:%.*]], label [[OMP_OFFLOAD_CONT27:%.*]]
 // CHECK17:       omp_offload.failed26:
@@ -18981,11 +18911,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK17-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK17-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -19033,7 +18963,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK17-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK17-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -19048,7 +18978,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -19131,11 +19061,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK17-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK17-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -19183,7 +19113,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK17-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK17-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -19198,7 +19128,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -19289,11 +19219,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP1]], i32* [[CONV1]], align 4
 // CHECK17-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -19349,7 +19279,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4
 // CHECK17-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -19364,7 +19294,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -19472,11 +19402,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK17-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK17-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -19524,7 +19454,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK17-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK17-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -19539,7 +19469,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -19628,11 +19558,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP1]], i32* [[CONV1]], align 4
 // CHECK17-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -19688,7 +19618,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4
 // CHECK17-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -19703,7 +19633,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -19783,7 +19713,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK17-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK17-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK17-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK17-NEXT:    ret void
@@ -19859,14 +19789,14 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK18-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK18-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK18-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK18-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK18-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK18-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK18-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK18-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK18-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK18-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK18-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -19875,304 +19805,290 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK18-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK18-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK18-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK18-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK18-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK18-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK18-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK18-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK18-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK18-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK18-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK18-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK18-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK18-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK18-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK18-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK18-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK18-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK18-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK18-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK18-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK18-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK18-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK18-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK18-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK18-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK18-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK18-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK18-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK18-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK18-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK18-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK18:       omp_offload.failed:
 // CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK18:       omp_offload.cont:
-// CHECK18-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK18-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK18-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK18-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK18-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK18-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK18-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK18-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i64 24, i1 false)
 // CHECK18-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK18-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK18-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK18-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK18-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK18-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK18-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK18-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK18-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK18-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK18-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK18-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK18-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK18-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK18-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK18-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK18-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK18-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK18-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK18-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK18-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK18-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK18-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK18-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK18-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK18-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK18-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK18-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK18-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK18-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK18-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK18-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK18-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK18-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK18-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK18-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK18-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK18-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK18-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP60]])
-// CHECK18-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK18-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK18-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK18-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK18-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP58]])
+// CHECK18-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK18-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK18:       omp_offload.failed16:
-// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK18:       omp_offload.cont17:
-// CHECK18-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M]], align 4
-// CHECK18-NEXT:    store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK18-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    [[TMP61:%.*]] = load i32, i32* [[M]], align 4
+// CHECK18-NEXT:    store i32 [[TMP61]], i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK18-NEXT:    [[TMP62:%.*]] = load i32, i32* [[N]], align 4
 // CHECK18-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP64]], i32* [[CONV20]], align 4
-// CHECK18-NEXT:    [[TMP65:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK18-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK18-NEXT:    store i32 [[TMP62]], i32* [[CONV20]], align 4
+// CHECK18-NEXT:    [[TMP63:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK18-NEXT:    [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
 // CHECK18-NEXT:    [[CONV21:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP66]], i32* [[CONV21]], align 4
-// CHECK18-NEXT:    [[TMP67:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK18-NEXT:    [[TMP68:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK18-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP65]], i64* [[TMP70]], align 8
-// CHECK18-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP65]], i64* [[TMP72]], align 8
-// CHECK18-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 4, i64* [[TMP73]], align 8
-// CHECK18-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
-// CHECK18-NEXT:    store i8* null, i8** [[TMP74]], align 8
-// CHECK18-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK18-NEXT:    store i32 [[TMP64]], i32* [[CONV21]], align 4
+// CHECK18-NEXT:    [[TMP65:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK18-NEXT:    [[TMP66:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK18-NEXT:    [[TMP67:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES25]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP67]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i64 32, i1 false)
+// CHECK18-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP63]], i64* [[TMP69]], align 8
+// CHECK18-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP63]], i64* [[TMP71]], align 8
+// CHECK18-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
+// CHECK18-NEXT:    store i8* null, i8** [[TMP72]], align 8
+// CHECK18-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP74]], align 8
+// CHECK18-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
 // CHECK18-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i64*
 // CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP76]], align 8
-// CHECK18-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
-// CHECK18-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 8, i64* [[TMP79]], align 8
-// CHECK18-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP80]], align 8
-// CHECK18-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP82]], align 8
-// CHECK18-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 8
-// CHECK18-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 [[TMP68]], i64* [[TMP85]], align 8
-// CHECK18-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP86]], align 8
-// CHECK18-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 8
-// CHECK18-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK18-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 3
-// CHECK18-NEXT:    store i64 4, i64* [[TMP91]], align 8
-// CHECK18-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
-// CHECK18-NEXT:    store i8* null, i8** [[TMP92]], align 8
-// CHECK18-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP96:%.*]] = load i32, i32* [[N]], align 4
-// CHECK18-NEXT:    store i32 [[TMP96]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK18-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK18-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP97]], 0
+// CHECK18-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP77]], align 8
+// CHECK18-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP79]], align 8
+// CHECK18-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 8
+// CHECK18-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
+// CHECK18-NEXT:    store i64 [[TMP66]], i64* [[TMP82]], align 8
+// CHECK18-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP83]], align 8
+// CHECK18-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP65]], i64* [[TMP85]], align 8
+// CHECK18-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP65]], i64* [[TMP87]], align 8
+// CHECK18-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
+// CHECK18-NEXT:    store i8* null, i8** [[TMP88]], align 8
+// CHECK18-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP92:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP92]], i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK18-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK18-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP93]], 0
 // CHECK18-NEXT:    [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1
 // CHECK18-NEXT:    [[SUB31:%.*]] = sub nsw i32 [[DIV30]], 1
 // CHECK18-NEXT:    store i32 [[SUB31]], i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK18-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK18-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP98]], 1
-// CHECK18-NEXT:    [[TMP99:%.*]] = zext i32 [[ADD32]] to i64
-// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP99]])
-// CHECK18-NEXT:    [[TMP100:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP93]], i8** [[TMP94]], i64* [[TMP95]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP101:%.*]] = icmp ne i32 [[TMP100]], 0
-// CHECK18-NEXT:    br i1 [[TMP101]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
+// CHECK18-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
+// CHECK18-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP94]], 1
+// CHECK18-NEXT:    [[TMP95:%.*]] = zext i32 [[ADD32]] to i64
+// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP95]])
+// CHECK18-NEXT:    [[TMP96:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP89]], i8** [[TMP90]], i64* [[TMP91]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP97:%.*]] = icmp ne i32 [[TMP96]], 0
+// CHECK18-NEXT:    br i1 [[TMP97]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
 // CHECK18:       omp_offload.failed33:
-// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i64 [[TMP65]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP67]]) #[[ATTR3]]
+// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i64 [[TMP63]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP65]]) #[[ATTR3]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT34]]
 // CHECK18:       omp_offload.cont34:
-// CHECK18-NEXT:    [[TMP102:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    [[TMP98:%.*]] = load i32, i32* [[N]], align 4
 // CHECK18-NEXT:    [[CONV36:%.*]] = bitcast i64* [[N_CASTED35]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP102]], i32* [[CONV36]], align 4
-// CHECK18-NEXT:    [[TMP103:%.*]] = load i64, i64* [[N_CASTED35]], align 8
-// CHECK18-NEXT:    [[TMP104:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK18-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP103]], i64* [[TMP106]], align 8
-// CHECK18-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
+// CHECK18-NEXT:    store i32 [[TMP98]], i32* [[CONV36]], align 4
+// CHECK18-NEXT:    [[TMP99:%.*]] = load i64, i64* [[N_CASTED35]], align 8
+// CHECK18-NEXT:    [[TMP100:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK18-NEXT:    [[TMP101:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES40]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP101]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i64 24, i1 false)
+// CHECK18-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP103:%.*]] = bitcast i8** [[TMP102]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP99]], i64* [[TMP103]], align 8
+// CHECK18-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP99]], i64* [[TMP105]], align 8
+// CHECK18-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 0
+// CHECK18-NEXT:    store i8* null, i8** [[TMP106]], align 8
+// CHECK18-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 1
 // CHECK18-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP103]], i64* [[TMP108]], align 8
-// CHECK18-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 4, i64* [[TMP109]], align 8
-// CHECK18-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 0
-// CHECK18-NEXT:    store i8* null, i8** [[TMP110]], align 8
-// CHECK18-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP112]], align 8
-// CHECK18-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP114]], align 8
-// CHECK18-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 8, i64* [[TMP115]], align 8
-// CHECK18-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP116]], align 8
-// CHECK18-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP118]], align 8
-// CHECK18-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP120]], align 8
-// CHECK18-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 [[TMP104]], i64* [[TMP121]], align 8
-// CHECK18-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP122]], align 8
-// CHECK18-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP126:%.*]] = load i32, i32* [[N]], align 4
-// CHECK18-NEXT:    store i32 [[TMP126]], i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK18-NEXT:    [[TMP127:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK18-NEXT:    [[SUB44:%.*]] = sub nsw i32 [[TMP127]], 0
+// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP108]], align 8
+// CHECK18-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP110]], align 8
+// CHECK18-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP111]], align 8
+// CHECK18-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP113]], align 8
+// CHECK18-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP115]], align 8
+// CHECK18-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 2
+// CHECK18-NEXT:    store i64 [[TMP100]], i64* [[TMP116]], align 8
+// CHECK18-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP117]], align 8
+// CHECK18-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP121:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP121]], i32* [[DOTCAPTURE_EXPR_42]], align 4
+// CHECK18-NEXT:    [[TMP122:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
+// CHECK18-NEXT:    [[SUB44:%.*]] = sub nsw i32 [[TMP122]], 0
 // CHECK18-NEXT:    [[DIV45:%.*]] = sdiv i32 [[SUB44]], 1
 // CHECK18-NEXT:    [[SUB46:%.*]] = sub nsw i32 [[DIV45]], 1
 // CHECK18-NEXT:    store i32 [[SUB46]], i32* [[DOTCAPTURE_EXPR_43]], align 4
-// CHECK18-NEXT:    [[TMP128:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_43]], align 4
-// CHECK18-NEXT:    [[ADD47:%.*]] = add nsw i32 [[TMP128]], 1
-// CHECK18-NEXT:    [[TMP129:%.*]] = zext i32 [[ADD47]] to i64
-// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP129]])
-// CHECK18-NEXT:    [[TMP130:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP123]], i8** [[TMP124]], i64* [[TMP125]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP131:%.*]] = icmp ne i32 [[TMP130]], 0
-// CHECK18-NEXT:    br i1 [[TMP131]], label [[OMP_OFFLOAD_FAILED48:%.*]], label [[OMP_OFFLOAD_CONT49:%.*]]
+// CHECK18-NEXT:    [[TMP123:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_43]], align 4
+// CHECK18-NEXT:    [[ADD47:%.*]] = add nsw i32 [[TMP123]], 1
+// CHECK18-NEXT:    [[TMP124:%.*]] = zext i32 [[ADD47]] to i64
+// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP124]])
+// CHECK18-NEXT:    [[TMP125:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP118]], i8** [[TMP119]], i64* [[TMP120]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP126:%.*]] = icmp ne i32 [[TMP125]], 0
+// CHECK18-NEXT:    br i1 [[TMP126]], label [[OMP_OFFLOAD_FAILED48:%.*]], label [[OMP_OFFLOAD_CONT49:%.*]]
 // CHECK18:       omp_offload.failed48:
-// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i64 [[TMP103]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i64 [[TMP99]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT49]]
 // CHECK18:       omp_offload.cont49:
-// CHECK18-NEXT:    [[TMP132:%.*]] = load i32, i32* [[M]], align 4
-// CHECK18-NEXT:    store i32 [[TMP132]], i32* [[DOTCAPTURE_EXPR_50]], align 4
-// CHECK18-NEXT:    [[TMP133:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    [[TMP127:%.*]] = load i32, i32* [[M]], align 4
+// CHECK18-NEXT:    store i32 [[TMP127]], i32* [[DOTCAPTURE_EXPR_50]], align 4
+// CHECK18-NEXT:    [[TMP128:%.*]] = load i32, i32* [[N]], align 4
 // CHECK18-NEXT:    [[CONV52:%.*]] = bitcast i64* [[N_CASTED51]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP133]], i32* [[CONV52]], align 4
-// CHECK18-NEXT:    [[TMP134:%.*]] = load i64, i64* [[N_CASTED51]], align 8
-// CHECK18-NEXT:    [[TMP135:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_50]], align 4
+// CHECK18-NEXT:    store i32 [[TMP128]], i32* [[CONV52]], align 4
+// CHECK18-NEXT:    [[TMP129:%.*]] = load i64, i64* [[N_CASTED51]], align 8
+// CHECK18-NEXT:    [[TMP130:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_50]], align 4
 // CHECK18-NEXT:    [[CONV54:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED53]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP135]], i32* [[CONV54]], align 4
-// CHECK18-NEXT:    [[TMP136:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED53]], align 8
-// CHECK18-NEXT:    [[TMP137:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK18-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP134]], i64* [[TMP139]], align 8
-// CHECK18-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP134]], i64* [[TMP141]], align 8
-// CHECK18-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 4, i64* [[TMP142]], align 8
-// CHECK18-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 0
+// CHECK18-NEXT:    store i32 [[TMP130]], i32* [[CONV54]], align 4
+// CHECK18-NEXT:    [[TMP131:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED53]], align 8
+// CHECK18-NEXT:    [[TMP132:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK18-NEXT:    [[TMP133:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES58]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP133]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i64 32, i1 false)
+// CHECK18-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP129]], i64* [[TMP135]], align 8
+// CHECK18-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP129]], i64* [[TMP137]], align 8
+// CHECK18-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 0
+// CHECK18-NEXT:    store i8* null, i8** [[TMP138]], align 8
+// CHECK18-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP140]], align 8
+// CHECK18-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP142]], align 8
+// CHECK18-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 1
 // CHECK18-NEXT:    store i8* null, i8** [[TMP143]], align 8
-// CHECK18-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP145]], align 8
-// CHECK18-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP147]], align 8
-// CHECK18-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 8, i64* [[TMP148]], align 8
-// CHECK18-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 1
+// CHECK18-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP145]], align 8
+// CHECK18-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP147]], align 8
+// CHECK18-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 2
+// CHECK18-NEXT:    store i64 [[TMP132]], i64* [[TMP148]], align 8
+// CHECK18-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 2
 // CHECK18-NEXT:    store i8* null, i8** [[TMP149]], align 8
-// CHECK18-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP151]], align 8
-// CHECK18-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP153]], align 8
-// CHECK18-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 [[TMP137]], i64* [[TMP154]], align 8
-// CHECK18-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP155]], align 8
-// CHECK18-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP136]], i64* [[TMP157]], align 8
-// CHECK18-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP136]], i64* [[TMP159]], align 8
-// CHECK18-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 3
-// CHECK18-NEXT:    store i64 4, i64* [[TMP160]], align 8
-// CHECK18-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 3
-// CHECK18-NEXT:    store i8* null, i8** [[TMP161]], align 8
-// CHECK18-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP165:%.*]] = load i32, i32* [[N]], align 4
-// CHECK18-NEXT:    store i32 [[TMP165]], i32* [[DOTCAPTURE_EXPR_60]], align 4
-// CHECK18-NEXT:    [[TMP166:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_60]], align 4
-// CHECK18-NEXT:    [[SUB62:%.*]] = sub nsw i32 [[TMP166]], 0
+// CHECK18-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP131]], i64* [[TMP151]], align 8
+// CHECK18-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP131]], i64* [[TMP153]], align 8
+// CHECK18-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 3
+// CHECK18-NEXT:    store i8* null, i8** [[TMP154]], align 8
+// CHECK18-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP158:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP158]], i32* [[DOTCAPTURE_EXPR_60]], align 4
+// CHECK18-NEXT:    [[TMP159:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_60]], align 4
+// CHECK18-NEXT:    [[SUB62:%.*]] = sub nsw i32 [[TMP159]], 0
 // CHECK18-NEXT:    [[DIV63:%.*]] = sdiv i32 [[SUB62]], 1
 // CHECK18-NEXT:    [[SUB64:%.*]] = sub nsw i32 [[DIV63]], 1
 // CHECK18-NEXT:    store i32 [[SUB64]], i32* [[DOTCAPTURE_EXPR_61]], align 4
-// CHECK18-NEXT:    [[TMP167:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_61]], align 4
-// CHECK18-NEXT:    [[ADD65:%.*]] = add nsw i32 [[TMP167]], 1
-// CHECK18-NEXT:    [[TMP168:%.*]] = zext i32 [[ADD65]] to i64
-// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP168]])
-// CHECK18-NEXT:    [[TMP169:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP162]], i8** [[TMP163]], i64* [[TMP164]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP170:%.*]] = icmp ne i32 [[TMP169]], 0
-// CHECK18-NEXT:    br i1 [[TMP170]], label [[OMP_OFFLOAD_FAILED66:%.*]], label [[OMP_OFFLOAD_CONT67:%.*]]
+// CHECK18-NEXT:    [[TMP160:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_61]], align 4
+// CHECK18-NEXT:    [[ADD65:%.*]] = add nsw i32 [[TMP160]], 1
+// CHECK18-NEXT:    [[TMP161:%.*]] = zext i32 [[ADD65]] to i64
+// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP161]])
+// CHECK18-NEXT:    [[TMP162:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP155]], i8** [[TMP156]], i64* [[TMP157]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP163:%.*]] = icmp ne i32 [[TMP162]], 0
+// CHECK18-NEXT:    br i1 [[TMP163]], label [[OMP_OFFLOAD_FAILED66:%.*]], label [[OMP_OFFLOAD_CONT67:%.*]]
 // CHECK18:       omp_offload.failed66:
-// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i64 [[TMP134]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP136]]) #[[ATTR3]]
+// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i64 [[TMP129]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP131]]) #[[ATTR3]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT67]]
 // CHECK18:       omp_offload.cont67:
-// CHECK18-NEXT:    [[TMP171:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK18-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP171]])
+// CHECK18-NEXT:    [[TMP164:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK18-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP164]])
 // CHECK18-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK18-NEXT:    [[TMP172:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP172]])
-// CHECK18-NEXT:    [[TMP173:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK18-NEXT:    ret i32 [[TMP173]]
+// CHECK18-NEXT:    [[TMP165:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP165]])
+// CHECK18-NEXT:    [[TMP166:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK18-NEXT:    ret i32 [[TMP166]]
 //
 //
 // CHECK18-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139
@@ -20644,11 +20560,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK18-NEXT:    [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -20735,7 +20651,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP22]], i32* [[CONV9]], align 4
 // CHECK18-NEXT:    [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]])
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -20777,7 +20693,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -20902,11 +20818,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK18-NEXT:    [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -20983,7 +20899,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP19]], i32* [[CONV6]], align 4
 // CHECK18-NEXT:    [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]])
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -21002,7 +20918,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21128,11 +21044,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK18-NEXT:    [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21217,7 +21133,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP21]], i32* [[CONV9]], align 4
 // CHECK18-NEXT:    [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]])
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -21236,7 +21152,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21342,7 +21258,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK18-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK18-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK18-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK18-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -21384,7 +21300,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK18-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK18-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK18-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK18:       omp_offload.failed:
@@ -21402,7 +21318,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK18-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK18-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK18-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK18:       omp_offload.failed5:
@@ -21434,7 +21350,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK18-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK18-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK18-NEXT:    br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK18:       omp_offload.failed11:
@@ -21452,7 +21368,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK18-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK18-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK18-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK18:       omp_offload.failed17:
@@ -21484,7 +21400,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
 // CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK18-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK18-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
 // CHECK18-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED26:%.*]], label [[OMP_OFFLOAD_CONT27:%.*]]
 // CHECK18:       omp_offload.failed26:
@@ -21500,11 +21416,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK18-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK18-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21552,7 +21468,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK18-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK18-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -21567,7 +21483,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21650,11 +21566,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK18-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK18-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21702,7 +21618,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK18-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK18-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -21717,7 +21633,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21808,11 +21724,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP1]], i32* [[CONV1]], align 4
 // CHECK18-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21868,7 +21784,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4
 // CHECK18-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -21883,7 +21799,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21991,11 +21907,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK18-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK18-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -22043,7 +21959,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK18-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK18-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -22058,7 +21974,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -22147,11 +22063,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP1]], i32* [[CONV1]], align 4
 // CHECK18-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -22207,7 +22123,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4
 // CHECK18-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -22222,7 +22138,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -22302,7 +22218,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK18-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK18-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK18-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK18-NEXT:    ret void
@@ -22377,14 +22293,14 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK19-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK19-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK19-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK19-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK19-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK19-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK19-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK19-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK19-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK19-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK19-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -22393,302 +22309,288 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK19-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK19-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK19-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK19-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK19-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK19-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK19-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK19-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK19-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK19-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK19-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK19-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK19-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK19-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK19-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK19-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK19-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK19-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK19-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK19-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK19-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK19-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK19-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK19-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK19-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK19-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK19-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK19-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK19-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK19-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK19-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK19-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK19:       omp_offload.failed:
 // CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK19:       omp_offload.cont:
-// CHECK19-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK19-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK19-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK19-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK19-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK19-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK19-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK19-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK19-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i32 24, i1 false)
 // CHECK19-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK19-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK19-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK19-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK19-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK19-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK19-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK19-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK19-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK19-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK19-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK19-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK19-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK19-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK19-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK19-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK19-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK19-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK19-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK19-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK19-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK19-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK19-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK19-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK19-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK19-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK19-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK19-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK19-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK19-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK19-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK19-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK19-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK19-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK19-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK19-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK19-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK19-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP61]])
-// CHECK19-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK19-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK19-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK19-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK19-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP59]])
+// CHECK19-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK19-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK19:       omp_offload.failed15:
-// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK19:       omp_offload.cont16:
-// CHECK19-NEXT:    [[TMP64:%.*]] = load i32, i32* [[M]], align 4
-// CHECK19-NEXT:    store i32 [[TMP64]], i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK19-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP65]], i32* [[N_CASTED18]], align 4
-// CHECK19-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N_CASTED18]], align 4
-// CHECK19-NEXT:    [[TMP67:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK19-NEXT:    store i32 [[TMP67]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    [[TMP68:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    [[TMP69:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK19-NEXT:    [[TMP70:%.*]] = sext i32 [[TMP69]] to i64
-// CHECK19-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP66]], i32* [[TMP72]], align 4
-// CHECK19-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP66]], i32* [[TMP74]], align 4
-// CHECK19-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 4, i64* [[TMP75]], align 4
-// CHECK19-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
-// CHECK19-NEXT:    store i8* null, i8** [[TMP76]], align 4
-// CHECK19-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP62:%.*]] = load i32, i32* [[M]], align 4
+// CHECK19-NEXT:    store i32 [[TMP62]], i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK19-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP63]], i32* [[N_CASTED18]], align 4
+// CHECK19-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N_CASTED18]], align 4
+// CHECK19-NEXT:    [[TMP65:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK19-NEXT:    store i32 [[TMP65]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK19-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK19-NEXT:    [[TMP67:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK19-NEXT:    [[TMP68:%.*]] = sext i32 [[TMP67]] to i64
+// CHECK19-NEXT:    [[TMP69:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES22]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP69]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i32 32, i1 false)
+// CHECK19-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP64]], i32* [[TMP71]], align 4
+// CHECK19-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP64]], i32* [[TMP73]], align 4
+// CHECK19-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
+// CHECK19-NEXT:    store i8* null, i8** [[TMP74]], align 4
+// CHECK19-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP76]], align 4
+// CHECK19-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
 // CHECK19-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
 // CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP78]], align 4
-// CHECK19-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
-// CHECK19-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP81]], align 4
-// CHECK19-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP82]], align 4
-// CHECK19-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 4
-// CHECK19-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP86]], align 4
-// CHECK19-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 [[TMP70]], i64* [[TMP87]], align 4
-// CHECK19-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP88]], align 4
-// CHECK19-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP68]], i32* [[TMP90]], align 4
-// CHECK19-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP68]], i32* [[TMP92]], align 4
-// CHECK19-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 3
-// CHECK19-NEXT:    store i64 4, i64* [[TMP93]], align 4
-// CHECK19-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
-// CHECK19-NEXT:    store i8* null, i8** [[TMP94]], align 4
-// CHECK19-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP98:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP98]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK19-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK19-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP99]], 0
+// CHECK19-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP79]], align 4
+// CHECK19-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 4
+// CHECK19-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 4
+// CHECK19-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
+// CHECK19-NEXT:    store i64 [[TMP68]], i64* [[TMP84]], align 4
+// CHECK19-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP85]], align 4
+// CHECK19-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP66]], i32* [[TMP87]], align 4
+// CHECK19-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP66]], i32* [[TMP89]], align 4
+// CHECK19-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
+// CHECK19-NEXT:    store i8* null, i8** [[TMP90]], align 4
+// CHECK19-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP94:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP94]], i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK19-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK19-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP95]], 0
 // CHECK19-NEXT:    [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1
 // CHECK19-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[DIV27]], 1
 // CHECK19-NEXT:    store i32 [[SUB28]], i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK19-NEXT:    [[TMP100:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK19-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP100]], 1
-// CHECK19-NEXT:    [[TMP101:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP101]])
-// CHECK19-NEXT:    [[TMP102:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP95]], i8** [[TMP96]], i64* [[TMP97]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP103:%.*]] = icmp ne i32 [[TMP102]], 0
-// CHECK19-NEXT:    br i1 [[TMP103]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK19-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK19-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP96]], 1
+// CHECK19-NEXT:    [[TMP97:%.*]] = zext i32 [[ADD29]] to i64
+// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP97]])
+// CHECK19-NEXT:    [[TMP98:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP91]], i8** [[TMP92]], i64* [[TMP93]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP99:%.*]] = icmp ne i32 [[TMP98]], 0
+// CHECK19-NEXT:    br i1 [[TMP99]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
 // CHECK19:       omp_offload.failed30:
-// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i32 [[TMP66]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP68]]) #[[ATTR3]]
+// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i32 [[TMP64]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP66]]) #[[ATTR3]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
 // CHECK19:       omp_offload.cont31:
-// CHECK19-NEXT:    [[TMP104:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP104]], i32* [[N_CASTED32]], align 4
-// CHECK19-NEXT:    [[TMP105:%.*]] = load i32, i32* [[N_CASTED32]], align 4
-// CHECK19-NEXT:    [[TMP106:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK19-NEXT:    [[TMP107:%.*]] = sext i32 [[TMP106]] to i64
-// CHECK19-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP105]], i32* [[TMP109]], align 4
-// CHECK19-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP100:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP100]], i32* [[N_CASTED32]], align 4
+// CHECK19-NEXT:    [[TMP101:%.*]] = load i32, i32* [[N_CASTED32]], align 4
+// CHECK19-NEXT:    [[TMP102:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK19-NEXT:    [[TMP103:%.*]] = sext i32 [[TMP102]] to i64
+// CHECK19-NEXT:    [[TMP104:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES36]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP104]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i32 24, i1 false)
+// CHECK19-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP101]], i32* [[TMP106]], align 4
+// CHECK19-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP101]], i32* [[TMP108]], align 4
+// CHECK19-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 0
+// CHECK19-NEXT:    store i8* null, i8** [[TMP109]], align 4
+// CHECK19-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 1
 // CHECK19-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP105]], i32* [[TMP111]], align 4
-// CHECK19-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 4, i64* [[TMP112]], align 4
-// CHECK19-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 0
-// CHECK19-NEXT:    store i8* null, i8** [[TMP113]], align 4
-// CHECK19-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP115]], align 4
-// CHECK19-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP117]], align 4
-// CHECK19-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP118]], align 4
-// CHECK19-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP119]], align 4
-// CHECK19-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP121:%.*]] = bitcast i8** [[TMP120]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP121]], align 4
-// CHECK19-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP123]], align 4
-// CHECK19-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 [[TMP107]], i64* [[TMP124]], align 4
-// CHECK19-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP125]], align 4
-// CHECK19-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP129:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP129]], i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK19-NEXT:    [[TMP130:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK19-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[TMP130]], 0
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP111]], align 4
+// CHECK19-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP113]], align 4
+// CHECK19-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP114]], align 4
+// CHECK19-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP116]], align 4
+// CHECK19-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP118]], align 4
+// CHECK19-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 2
+// CHECK19-NEXT:    store i64 [[TMP103]], i64* [[TMP119]], align 4
+// CHECK19-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP120]], align 4
+// CHECK19-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP124:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP124]], i32* [[DOTCAPTURE_EXPR_38]], align 4
+// CHECK19-NEXT:    [[TMP125:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
+// CHECK19-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[TMP125]], 0
 // CHECK19-NEXT:    [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1
 // CHECK19-NEXT:    [[SUB42:%.*]] = sub nsw i32 [[DIV41]], 1
 // CHECK19-NEXT:    store i32 [[SUB42]], i32* [[DOTCAPTURE_EXPR_39]], align 4
-// CHECK19-NEXT:    [[TMP131:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4
-// CHECK19-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP131]], 1
-// CHECK19-NEXT:    [[TMP132:%.*]] = zext i32 [[ADD43]] to i64
-// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP132]])
-// CHECK19-NEXT:    [[TMP133:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP126]], i8** [[TMP127]], i64* [[TMP128]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP134:%.*]] = icmp ne i32 [[TMP133]], 0
-// CHECK19-NEXT:    br i1 [[TMP134]], label [[OMP_OFFLOAD_FAILED44:%.*]], label [[OMP_OFFLOAD_CONT45:%.*]]
+// CHECK19-NEXT:    [[TMP126:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4
+// CHECK19-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP126]], 1
+// CHECK19-NEXT:    [[TMP127:%.*]] = zext i32 [[ADD43]] to i64
+// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP127]])
+// CHECK19-NEXT:    [[TMP128:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP121]], i8** [[TMP122]], i64* [[TMP123]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP129:%.*]] = icmp ne i32 [[TMP128]], 0
+// CHECK19-NEXT:    br i1 [[TMP129]], label [[OMP_OFFLOAD_FAILED44:%.*]], label [[OMP_OFFLOAD_CONT45:%.*]]
 // CHECK19:       omp_offload.failed44:
-// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i32 [[TMP105]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i32 [[TMP101]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT45]]
 // CHECK19:       omp_offload.cont45:
-// CHECK19-NEXT:    [[TMP135:%.*]] = load i32, i32* [[M]], align 4
-// CHECK19-NEXT:    store i32 [[TMP135]], i32* [[DOTCAPTURE_EXPR_46]], align 4
-// CHECK19-NEXT:    [[TMP136:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP136]], i32* [[N_CASTED47]], align 4
-// CHECK19-NEXT:    [[TMP137:%.*]] = load i32, i32* [[N_CASTED47]], align 4
-// CHECK19-NEXT:    [[TMP138:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_46]], align 4
-// CHECK19-NEXT:    store i32 [[TMP138]], i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
-// CHECK19-NEXT:    [[TMP139:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
-// CHECK19-NEXT:    [[TMP140:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK19-NEXT:    [[TMP141:%.*]] = sext i32 [[TMP140]] to i64
-// CHECK19-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP137]], i32* [[TMP143]], align 4
-// CHECK19-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP137]], i32* [[TMP145]], align 4
-// CHECK19-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 4, i64* [[TMP146]], align 4
-// CHECK19-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP130:%.*]] = load i32, i32* [[M]], align 4
+// CHECK19-NEXT:    store i32 [[TMP130]], i32* [[DOTCAPTURE_EXPR_46]], align 4
+// CHECK19-NEXT:    [[TMP131:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP131]], i32* [[N_CASTED47]], align 4
+// CHECK19-NEXT:    [[TMP132:%.*]] = load i32, i32* [[N_CASTED47]], align 4
+// CHECK19-NEXT:    [[TMP133:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_46]], align 4
+// CHECK19-NEXT:    store i32 [[TMP133]], i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
+// CHECK19-NEXT:    [[TMP134:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
+// CHECK19-NEXT:    [[TMP135:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK19-NEXT:    [[TMP136:%.*]] = sext i32 [[TMP135]] to i64
+// CHECK19-NEXT:    [[TMP137:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES52]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP137]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i32 32, i1 false)
+// CHECK19-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP132]], i32* [[TMP139]], align 4
+// CHECK19-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP132]], i32* [[TMP141]], align 4
+// CHECK19-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 0
+// CHECK19-NEXT:    store i8* null, i8** [[TMP142]], align 4
+// CHECK19-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP144]], align 4
+// CHECK19-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP146]], align 4
+// CHECK19-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 1
 // CHECK19-NEXT:    store i8* null, i8** [[TMP147]], align 4
-// CHECK19-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP149]], align 4
-// CHECK19-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP151]], align 4
-// CHECK19-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP152]], align 4
-// CHECK19-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP149]], align 4
+// CHECK19-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP151]], align 4
+// CHECK19-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 2
+// CHECK19-NEXT:    store i64 [[TMP136]], i64* [[TMP152]], align 4
+// CHECK19-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 2
 // CHECK19-NEXT:    store i8* null, i8** [[TMP153]], align 4
-// CHECK19-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP155]], align 4
-// CHECK19-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP157]], align 4
-// CHECK19-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 [[TMP141]], i64* [[TMP158]], align 4
-// CHECK19-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP159]], align 4
-// CHECK19-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP161:%.*]] = bitcast i8** [[TMP160]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP139]], i32* [[TMP161]], align 4
-// CHECK19-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP163:%.*]] = bitcast i8** [[TMP162]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP139]], i32* [[TMP163]], align 4
-// CHECK19-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 3
-// CHECK19-NEXT:    store i64 4, i64* [[TMP164]], align 4
-// CHECK19-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 3
-// CHECK19-NEXT:    store i8* null, i8** [[TMP165]], align 4
-// CHECK19-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP169:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP169]], i32* [[DOTCAPTURE_EXPR_54]], align 4
-// CHECK19-NEXT:    [[TMP170:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_54]], align 4
-// CHECK19-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[TMP170]], 0
+// CHECK19-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP134]], i32* [[TMP155]], align 4
+// CHECK19-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP134]], i32* [[TMP157]], align 4
+// CHECK19-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 3
+// CHECK19-NEXT:    store i8* null, i8** [[TMP158]], align 4
+// CHECK19-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP162:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP162]], i32* [[DOTCAPTURE_EXPR_54]], align 4
+// CHECK19-NEXT:    [[TMP163:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_54]], align 4
+// CHECK19-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[TMP163]], 0
 // CHECK19-NEXT:    [[DIV57:%.*]] = sdiv i32 [[SUB56]], 1
 // CHECK19-NEXT:    [[SUB58:%.*]] = sub nsw i32 [[DIV57]], 1
 // CHECK19-NEXT:    store i32 [[SUB58]], i32* [[DOTCAPTURE_EXPR_55]], align 4
-// CHECK19-NEXT:    [[TMP171:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_55]], align 4
-// CHECK19-NEXT:    [[ADD59:%.*]] = add nsw i32 [[TMP171]], 1
-// CHECK19-NEXT:    [[TMP172:%.*]] = zext i32 [[ADD59]] to i64
-// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP172]])
-// CHECK19-NEXT:    [[TMP173:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP166]], i8** [[TMP167]], i64* [[TMP168]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP174:%.*]] = icmp ne i32 [[TMP173]], 0
-// CHECK19-NEXT:    br i1 [[TMP174]], label [[OMP_OFFLOAD_FAILED60:%.*]], label [[OMP_OFFLOAD_CONT61:%.*]]
+// CHECK19-NEXT:    [[TMP164:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_55]], align 4
+// CHECK19-NEXT:    [[ADD59:%.*]] = add nsw i32 [[TMP164]], 1
+// CHECK19-NEXT:    [[TMP165:%.*]] = zext i32 [[ADD59]] to i64
+// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP165]])
+// CHECK19-NEXT:    [[TMP166:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP159]], i8** [[TMP160]], i64* [[TMP161]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP167:%.*]] = icmp ne i32 [[TMP166]], 0
+// CHECK19-NEXT:    br i1 [[TMP167]], label [[OMP_OFFLOAD_FAILED60:%.*]], label [[OMP_OFFLOAD_CONT61:%.*]]
 // CHECK19:       omp_offload.failed60:
-// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i32 [[TMP137]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP139]]) #[[ATTR3]]
+// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i32 [[TMP132]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP134]]) #[[ATTR3]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT61]]
 // CHECK19:       omp_offload.cont61:
-// CHECK19-NEXT:    [[TMP175:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK19-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP175]])
+// CHECK19-NEXT:    [[TMP168:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK19-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP168]])
 // CHECK19-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK19-NEXT:    [[TMP176:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP176]])
-// CHECK19-NEXT:    [[TMP177:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK19-NEXT:    ret i32 [[TMP177]]
+// CHECK19-NEXT:    [[TMP169:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP169]])
+// CHECK19-NEXT:    [[TMP170:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK19-NEXT:    ret i32 [[TMP170]]
 //
 //
 // CHECK19-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139
@@ -23136,11 +23038,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK19-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -23221,7 +23123,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK19-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]])
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -23263,7 +23165,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -23381,11 +23283,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP2]], i32* [[N_CASTED]], align 4
 // CHECK19-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -23458,7 +23360,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP17]], i32* [[N_CASTED]], align 4
 // CHECK19-NEXT:    [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]])
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -23477,7 +23379,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -23595,11 +23497,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK19-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -23678,7 +23580,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK19-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]])
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -23697,7 +23599,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -23798,7 +23700,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK19-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK19-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK19-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -23840,7 +23742,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK19-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK19-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK19-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK19:       omp_offload.failed:
@@ -23858,7 +23760,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK19-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK19-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK19-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK19:       omp_offload.failed5:
@@ -23889,7 +23791,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK19-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK19-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK19-NEXT:    br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK19:       omp_offload.failed11:
@@ -23907,7 +23809,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK19-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK19-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK19-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK19:       omp_offload.failed17:
@@ -23938,7 +23840,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
 // CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK19-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK19-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
 // CHECK19-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED25:%.*]], label [[OMP_OFFLOAD_CONT26:%.*]]
 // CHECK19:       omp_offload.failed25:
@@ -23954,11 +23856,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK19-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK19-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -24004,7 +23906,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.inner.for.body:
 // CHECK19-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK19-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -24019,7 +23921,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -24099,11 +24001,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK19-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK19-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -24149,7 +24051,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.inner.for.body:
 // CHECK19-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK19-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -24164,7 +24066,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -24250,11 +24152,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK19-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -24306,7 +24208,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK19-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -24321,7 +24223,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -24423,11 +24325,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK19-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK19-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -24473,7 +24375,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.inner.for.body:
 // CHECK19-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK19-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -24488,7 +24390,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -24572,11 +24474,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK19-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -24628,7 +24530,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK19-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -24643,7 +24545,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -24719,7 +24621,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK19-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK19-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK19-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK19-NEXT:    ret void
@@ -24794,14 +24696,14 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK20-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK20-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK20-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK20-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK20-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK20-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK20-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK20-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK20-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK20-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK20-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -24810,302 +24712,288 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK20-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK20-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK20-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK20-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK20-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK20-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK20-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK20-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK20-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK20-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK20-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK20-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK20-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK20-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK20-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK20-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK20-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK20-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK20-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK20-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK20-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK20-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK20-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK20-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK20-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK20-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK20-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK20-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK20-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK20-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK20-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK20-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK20:       omp_offload.failed:
 // CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK20:       omp_offload.cont:
-// CHECK20-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK20-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK20-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK20-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK20-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK20-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK20-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK20-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK20-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i32 24, i1 false)
 // CHECK20-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK20-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK20-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK20-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK20-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK20-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK20-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK20-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK20-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK20-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK20-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK20-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK20-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK20-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK20-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK20-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK20-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK20-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK20-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK20-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK20-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK20-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK20-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK20-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK20-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK20-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK20-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK20-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK20-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK20-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK20-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK20-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK20-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK20-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK20-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK20-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK20-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK20-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP61]])
-// CHECK20-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK20-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK20-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK20-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK20-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP59]])
+// CHECK20-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK20-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK20:       omp_offload.failed15:
-// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK20:       omp_offload.cont16:
-// CHECK20-NEXT:    [[TMP64:%.*]] = load i32, i32* [[M]], align 4
-// CHECK20-NEXT:    store i32 [[TMP64]], i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK20-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP65]], i32* [[N_CASTED18]], align 4
-// CHECK20-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N_CASTED18]], align 4
-// CHECK20-NEXT:    [[TMP67:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK20-NEXT:    store i32 [[TMP67]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    [[TMP68:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    [[TMP69:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK20-NEXT:    [[TMP70:%.*]] = sext i32 [[TMP69]] to i64
-// CHECK20-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP66]], i32* [[TMP72]], align 4
-// CHECK20-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP66]], i32* [[TMP74]], align 4
-// CHECK20-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 4, i64* [[TMP75]], align 4
-// CHECK20-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
-// CHECK20-NEXT:    store i8* null, i8** [[TMP76]], align 4
-// CHECK20-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP62:%.*]] = load i32, i32* [[M]], align 4
+// CHECK20-NEXT:    store i32 [[TMP62]], i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK20-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP63]], i32* [[N_CASTED18]], align 4
+// CHECK20-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N_CASTED18]], align 4
+// CHECK20-NEXT:    [[TMP65:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK20-NEXT:    store i32 [[TMP65]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK20-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK20-NEXT:    [[TMP67:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK20-NEXT:    [[TMP68:%.*]] = sext i32 [[TMP67]] to i64
+// CHECK20-NEXT:    [[TMP69:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES22]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP69]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i32 32, i1 false)
+// CHECK20-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP64]], i32* [[TMP71]], align 4
+// CHECK20-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP64]], i32* [[TMP73]], align 4
+// CHECK20-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
+// CHECK20-NEXT:    store i8* null, i8** [[TMP74]], align 4
+// CHECK20-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP76]], align 4
+// CHECK20-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
 // CHECK20-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
 // CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP78]], align 4
-// CHECK20-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
-// CHECK20-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP81]], align 4
-// CHECK20-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP82]], align 4
-// CHECK20-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 4
-// CHECK20-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP86]], align 4
-// CHECK20-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 [[TMP70]], i64* [[TMP87]], align 4
-// CHECK20-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP88]], align 4
-// CHECK20-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP68]], i32* [[TMP90]], align 4
-// CHECK20-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP68]], i32* [[TMP92]], align 4
-// CHECK20-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 3
-// CHECK20-NEXT:    store i64 4, i64* [[TMP93]], align 4
-// CHECK20-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
-// CHECK20-NEXT:    store i8* null, i8** [[TMP94]], align 4
-// CHECK20-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP98:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP98]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK20-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK20-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP99]], 0
+// CHECK20-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP79]], align 4
+// CHECK20-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 4
+// CHECK20-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 4
+// CHECK20-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
+// CHECK20-NEXT:    store i64 [[TMP68]], i64* [[TMP84]], align 4
+// CHECK20-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP85]], align 4
+// CHECK20-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP66]], i32* [[TMP87]], align 4
+// CHECK20-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP66]], i32* [[TMP89]], align 4
+// CHECK20-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
+// CHECK20-NEXT:    store i8* null, i8** [[TMP90]], align 4
+// CHECK20-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP94:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP94]], i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK20-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK20-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP95]], 0
 // CHECK20-NEXT:    [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1
 // CHECK20-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[DIV27]], 1
 // CHECK20-NEXT:    store i32 [[SUB28]], i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK20-NEXT:    [[TMP100:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK20-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP100]], 1
-// CHECK20-NEXT:    [[TMP101:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP101]])
-// CHECK20-NEXT:    [[TMP102:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP95]], i8** [[TMP96]], i64* [[TMP97]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP103:%.*]] = icmp ne i32 [[TMP102]], 0
-// CHECK20-NEXT:    br i1 [[TMP103]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK20-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK20-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP96]], 1
+// CHECK20-NEXT:    [[TMP97:%.*]] = zext i32 [[ADD29]] to i64
+// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP97]])
+// CHECK20-NEXT:    [[TMP98:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP91]], i8** [[TMP92]], i64* [[TMP93]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP99:%.*]] = icmp ne i32 [[TMP98]], 0
+// CHECK20-NEXT:    br i1 [[TMP99]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
 // CHECK20:       omp_offload.failed30:
-// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i32 [[TMP66]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP68]]) #[[ATTR3]]
+// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i32 [[TMP64]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP66]]) #[[ATTR3]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
 // CHECK20:       omp_offload.cont31:
-// CHECK20-NEXT:    [[TMP104:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP104]], i32* [[N_CASTED32]], align 4
-// CHECK20-NEXT:    [[TMP105:%.*]] = load i32, i32* [[N_CASTED32]], align 4
-// CHECK20-NEXT:    [[TMP106:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK20-NEXT:    [[TMP107:%.*]] = sext i32 [[TMP106]] to i64
-// CHECK20-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP105]], i32* [[TMP109]], align 4
-// CHECK20-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP100:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP100]], i32* [[N_CASTED32]], align 4
+// CHECK20-NEXT:    [[TMP101:%.*]] = load i32, i32* [[N_CASTED32]], align 4
+// CHECK20-NEXT:    [[TMP102:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK20-NEXT:    [[TMP103:%.*]] = sext i32 [[TMP102]] to i64
+// CHECK20-NEXT:    [[TMP104:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES36]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP104]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i32 24, i1 false)
+// CHECK20-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP101]], i32* [[TMP106]], align 4
+// CHECK20-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP101]], i32* [[TMP108]], align 4
+// CHECK20-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 0
+// CHECK20-NEXT:    store i8* null, i8** [[TMP109]], align 4
+// CHECK20-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 1
 // CHECK20-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP105]], i32* [[TMP111]], align 4
-// CHECK20-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 4, i64* [[TMP112]], align 4
-// CHECK20-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 0
-// CHECK20-NEXT:    store i8* null, i8** [[TMP113]], align 4
-// CHECK20-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP115]], align 4
-// CHECK20-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP117]], align 4
-// CHECK20-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP118]], align 4
-// CHECK20-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP119]], align 4
-// CHECK20-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP121:%.*]] = bitcast i8** [[TMP120]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP121]], align 4
-// CHECK20-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP123]], align 4
-// CHECK20-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 [[TMP107]], i64* [[TMP124]], align 4
-// CHECK20-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP125]], align 4
-// CHECK20-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP129:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP129]], i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK20-NEXT:    [[TMP130:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK20-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[TMP130]], 0
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP111]], align 4
+// CHECK20-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP113]], align 4
+// CHECK20-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP114]], align 4
+// CHECK20-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP116]], align 4
+// CHECK20-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP118]], align 4
+// CHECK20-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 2
+// CHECK20-NEXT:    store i64 [[TMP103]], i64* [[TMP119]], align 4
+// CHECK20-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP120]], align 4
+// CHECK20-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP124:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP124]], i32* [[DOTCAPTURE_EXPR_38]], align 4
+// CHECK20-NEXT:    [[TMP125:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
+// CHECK20-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[TMP125]], 0
 // CHECK20-NEXT:    [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1
 // CHECK20-NEXT:    [[SUB42:%.*]] = sub nsw i32 [[DIV41]], 1
 // CHECK20-NEXT:    store i32 [[SUB42]], i32* [[DOTCAPTURE_EXPR_39]], align 4
-// CHECK20-NEXT:    [[TMP131:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4
-// CHECK20-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP131]], 1
-// CHECK20-NEXT:    [[TMP132:%.*]] = zext i32 [[ADD43]] to i64
-// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP132]])
-// CHECK20-NEXT:    [[TMP133:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP126]], i8** [[TMP127]], i64* [[TMP128]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP134:%.*]] = icmp ne i32 [[TMP133]], 0
-// CHECK20-NEXT:    br i1 [[TMP134]], label [[OMP_OFFLOAD_FAILED44:%.*]], label [[OMP_OFFLOAD_CONT45:%.*]]
+// CHECK20-NEXT:    [[TMP126:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4
+// CHECK20-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP126]], 1
+// CHECK20-NEXT:    [[TMP127:%.*]] = zext i32 [[ADD43]] to i64
+// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP127]])
+// CHECK20-NEXT:    [[TMP128:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP121]], i8** [[TMP122]], i64* [[TMP123]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP129:%.*]] = icmp ne i32 [[TMP128]], 0
+// CHECK20-NEXT:    br i1 [[TMP129]], label [[OMP_OFFLOAD_FAILED44:%.*]], label [[OMP_OFFLOAD_CONT45:%.*]]
 // CHECK20:       omp_offload.failed44:
-// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i32 [[TMP105]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i32 [[TMP101]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT45]]
 // CHECK20:       omp_offload.cont45:
-// CHECK20-NEXT:    [[TMP135:%.*]] = load i32, i32* [[M]], align 4
-// CHECK20-NEXT:    store i32 [[TMP135]], i32* [[DOTCAPTURE_EXPR_46]], align 4
-// CHECK20-NEXT:    [[TMP136:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP136]], i32* [[N_CASTED47]], align 4
-// CHECK20-NEXT:    [[TMP137:%.*]] = load i32, i32* [[N_CASTED47]], align 4
-// CHECK20-NEXT:    [[TMP138:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_46]], align 4
-// CHECK20-NEXT:    store i32 [[TMP138]], i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
-// CHECK20-NEXT:    [[TMP139:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
-// CHECK20-NEXT:    [[TMP140:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK20-NEXT:    [[TMP141:%.*]] = sext i32 [[TMP140]] to i64
-// CHECK20-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP137]], i32* [[TMP143]], align 4
-// CHECK20-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP137]], i32* [[TMP145]], align 4
-// CHECK20-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 4, i64* [[TMP146]], align 4
-// CHECK20-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP130:%.*]] = load i32, i32* [[M]], align 4
+// CHECK20-NEXT:    store i32 [[TMP130]], i32* [[DOTCAPTURE_EXPR_46]], align 4
+// CHECK20-NEXT:    [[TMP131:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP131]], i32* [[N_CASTED47]], align 4
+// CHECK20-NEXT:    [[TMP132:%.*]] = load i32, i32* [[N_CASTED47]], align 4
+// CHECK20-NEXT:    [[TMP133:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_46]], align 4
+// CHECK20-NEXT:    store i32 [[TMP133]], i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
+// CHECK20-NEXT:    [[TMP134:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
+// CHECK20-NEXT:    [[TMP135:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK20-NEXT:    [[TMP136:%.*]] = sext i32 [[TMP135]] to i64
+// CHECK20-NEXT:    [[TMP137:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES52]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP137]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i32 32, i1 false)
+// CHECK20-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP132]], i32* [[TMP139]], align 4
+// CHECK20-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP132]], i32* [[TMP141]], align 4
+// CHECK20-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 0
+// CHECK20-NEXT:    store i8* null, i8** [[TMP142]], align 4
+// CHECK20-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP144]], align 4
+// CHECK20-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP146]], align 4
+// CHECK20-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 1
 // CHECK20-NEXT:    store i8* null, i8** [[TMP147]], align 4
-// CHECK20-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP149]], align 4
-// CHECK20-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP151]], align 4
-// CHECK20-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP152]], align 4
-// CHECK20-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP149]], align 4
+// CHECK20-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP151]], align 4
+// CHECK20-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 2
+// CHECK20-NEXT:    store i64 [[TMP136]], i64* [[TMP152]], align 4
+// CHECK20-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 2
 // CHECK20-NEXT:    store i8* null, i8** [[TMP153]], align 4
-// CHECK20-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP155]], align 4
-// CHECK20-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP157]], align 4
-// CHECK20-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 [[TMP141]], i64* [[TMP158]], align 4
-// CHECK20-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP159]], align 4
-// CHECK20-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP161:%.*]] = bitcast i8** [[TMP160]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP139]], i32* [[TMP161]], align 4
-// CHECK20-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP163:%.*]] = bitcast i8** [[TMP162]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP139]], i32* [[TMP163]], align 4
-// CHECK20-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 3
-// CHECK20-NEXT:    store i64 4, i64* [[TMP164]], align 4
-// CHECK20-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 3
-// CHECK20-NEXT:    store i8* null, i8** [[TMP165]], align 4
-// CHECK20-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP169:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP169]], i32* [[DOTCAPTURE_EXPR_54]], align 4
-// CHECK20-NEXT:    [[TMP170:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_54]], align 4
-// CHECK20-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[TMP170]], 0
+// CHECK20-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP134]], i32* [[TMP155]], align 4
+// CHECK20-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP134]], i32* [[TMP157]], align 4
+// CHECK20-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 3
+// CHECK20-NEXT:    store i8* null, i8** [[TMP158]], align 4
+// CHECK20-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP162:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP162]], i32* [[DOTCAPTURE_EXPR_54]], align 4
+// CHECK20-NEXT:    [[TMP163:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_54]], align 4
+// CHECK20-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[TMP163]], 0
 // CHECK20-NEXT:    [[DIV57:%.*]] = sdiv i32 [[SUB56]], 1
 // CHECK20-NEXT:    [[SUB58:%.*]] = sub nsw i32 [[DIV57]], 1
 // CHECK20-NEXT:    store i32 [[SUB58]], i32* [[DOTCAPTURE_EXPR_55]], align 4
-// CHECK20-NEXT:    [[TMP171:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_55]], align 4
-// CHECK20-NEXT:    [[ADD59:%.*]] = add nsw i32 [[TMP171]], 1
-// CHECK20-NEXT:    [[TMP172:%.*]] = zext i32 [[ADD59]] to i64
-// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP172]])
-// CHECK20-NEXT:    [[TMP173:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP166]], i8** [[TMP167]], i64* [[TMP168]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP174:%.*]] = icmp ne i32 [[TMP173]], 0
-// CHECK20-NEXT:    br i1 [[TMP174]], label [[OMP_OFFLOAD_FAILED60:%.*]], label [[OMP_OFFLOAD_CONT61:%.*]]
+// CHECK20-NEXT:    [[TMP164:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_55]], align 4
+// CHECK20-NEXT:    [[ADD59:%.*]] = add nsw i32 [[TMP164]], 1
+// CHECK20-NEXT:    [[TMP165:%.*]] = zext i32 [[ADD59]] to i64
+// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP165]])
+// CHECK20-NEXT:    [[TMP166:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP159]], i8** [[TMP160]], i64* [[TMP161]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP167:%.*]] = icmp ne i32 [[TMP166]], 0
+// CHECK20-NEXT:    br i1 [[TMP167]], label [[OMP_OFFLOAD_FAILED60:%.*]], label [[OMP_OFFLOAD_CONT61:%.*]]
 // CHECK20:       omp_offload.failed60:
-// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i32 [[TMP137]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP139]]) #[[ATTR3]]
+// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i32 [[TMP132]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP134]]) #[[ATTR3]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT61]]
 // CHECK20:       omp_offload.cont61:
-// CHECK20-NEXT:    [[TMP175:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK20-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP175]])
+// CHECK20-NEXT:    [[TMP168:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK20-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP168]])
 // CHECK20-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK20-NEXT:    [[TMP176:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP176]])
-// CHECK20-NEXT:    [[TMP177:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK20-NEXT:    ret i32 [[TMP177]]
+// CHECK20-NEXT:    [[TMP169:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP169]])
+// CHECK20-NEXT:    [[TMP170:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK20-NEXT:    ret i32 [[TMP170]]
 //
 //
 // CHECK20-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139
@@ -25553,11 +25441,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK20-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -25638,7 +25526,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK20-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]])
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -25680,7 +25568,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -25798,11 +25686,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP2]], i32* [[N_CASTED]], align 4
 // CHECK20-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -25875,7 +25763,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP17]], i32* [[N_CASTED]], align 4
 // CHECK20-NEXT:    [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]])
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -25894,7 +25782,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26012,11 +25900,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK20-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26095,7 +25983,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK20-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]])
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -26114,7 +26002,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26215,7 +26103,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK20-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK20-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK20-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK20-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -26257,7 +26145,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK20-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK20-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK20-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK20:       omp_offload.failed:
@@ -26275,7 +26163,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK20-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK20-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK20-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK20:       omp_offload.failed5:
@@ -26306,7 +26194,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK20-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK20-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK20-NEXT:    br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK20:       omp_offload.failed11:
@@ -26324,7 +26212,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK20-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK20-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK20-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK20:       omp_offload.failed17:
@@ -26355,7 +26243,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
 // CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK20-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK20-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
 // CHECK20-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED25:%.*]], label [[OMP_OFFLOAD_CONT26:%.*]]
 // CHECK20:       omp_offload.failed25:
@@ -26371,11 +26259,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK20-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK20-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26421,7 +26309,7 @@ int main (int argc, char **argv) {
 // CHECK20:       omp.inner.for.body:
 // CHECK20-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK20-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -26436,7 +26324,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26516,11 +26404,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK20-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK20-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26566,7 +26454,7 @@ int main (int argc, char **argv) {
 // CHECK20:       omp.inner.for.body:
 // CHECK20-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK20-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -26581,7 +26469,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26667,11 +26555,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK20-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26723,7 +26611,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK20-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -26738,7 +26626,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26840,11 +26728,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK20-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK20-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26890,7 +26778,7 @@ int main (int argc, char **argv) {
 // CHECK20:       omp.inner.for.body:
 // CHECK20-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK20-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -26905,7 +26793,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26989,11 +26877,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK20-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -27045,7 +26933,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK20-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -27060,7 +26948,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -27136,7 +27024,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK20-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK20-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK20-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK20-NEXT:    ret void

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp
index 905a621de0966..5c9017d79e2e2 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp
@@ -1354,14 +1354,14 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP9:%.*]] = load i64, i64* [[M_CASTED]], align 8
 // CHECK9-NEXT:    [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]]
 // CHECK9-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
-// CHECK9-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP13]], align 8
-// CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP15]], align 8
-// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP16]], align 8
+// CHECK9-NEXT:    [[TMP12:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP12]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes to i8*), i64 40, i1 false)
+// CHECK9-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP14]], align 8
+// CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP16]], align 8
 // CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK9-NEXT:    store i8* null, i8** [[TMP17]], align 8
 // CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1370,75 +1370,69 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP9]], i64* [[TMP21]], align 8
-// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 4, i64* [[TMP22]], align 8
-// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP25]], align 8
-// CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP27]], align 8
-// CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 8, i64* [[TMP28]], align 8
-// CHECK9-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP29]], align 8
-// CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP24]], align 8
+// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP26]], align 8
+// CHECK9-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP27]], align 8
+// CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP3]], i64* [[TMP29]], align 8
+// CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK9-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP3]], i64* [[TMP31]], align 8
-// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP3]], i64* [[TMP33]], align 8
-// CHECK9-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK9-NEXT:    store i64 8, i64* [[TMP34]], align 8
-// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP35]], align 8
-// CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP37]], align 8
-// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP39]], align 8
-// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK9-NEXT:    store i64 [[TMP11]], i64* [[TMP40]], align 8
-// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK9-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP45:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[TMP46:%.*]] = load i32, i32* [[M]], align 4
-// CHECK9-NEXT:    store i32 [[TMP46]], i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK9-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP32]], align 8
+// CHECK9-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP34]], align 8
+// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 8
+// CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK9-NEXT:    store i64 [[TMP11]], i64* [[TMP37]], align 8
+// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK9-NEXT:    store i8* null, i8** [[TMP38]], align 8
+// CHECK9-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP42:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[TMP43:%.*]] = load i32, i32* [[M]], align 4
+// CHECK9-NEXT:    store i32 [[TMP43]], i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK9-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK9-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK9-NEXT:    [[CONV5:%.*]] = sext i32 [[DIV]] to i64
-// CHECK9-NEXT:    [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK9-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP48]], 0
+// CHECK9-NEXT:    [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK9-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0
 // CHECK9-NEXT:    [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
 // CHECK9-NEXT:    [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
 // CHECK9-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]]
 // CHECK9-NEXT:    [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK9-NEXT:    store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK9-NEXT:    [[TMP49:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP49]], 1
+// CHECK9-NEXT:    [[TMP46:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
+// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP46]], 1
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK9-NEXT:    [[TMP50:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, i32 5, i8** [[TMP42]], i8** [[TMP43]], i64* [[TMP44]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
-// CHECK9-NEXT:    br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[TMP47:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, i32 5, i8** [[TMP39]], i8** [[TMP40]], i64* [[TMP41]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0
+// CHECK9-NEXT:    br i1 [[TMP48]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[TMP52:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP52]])
+// CHECK9-NEXT:    [[TMP49:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP49]])
 // CHECK9-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    [[TMP53:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP53]])
-// CHECK9-NEXT:    [[TMP54:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    ret i32 [[TMP54]]
+// CHECK9-NEXT:    [[TMP50:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP50]])
+// CHECK9-NEXT:    [[TMP51:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    ret i32 [[TMP51]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81
@@ -1788,7 +1782,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -1809,7 +1803,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 20)
-// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK9-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK9-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
@@ -2002,7 +1996,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK9-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK9-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK9-NEXT:    ret void
@@ -2055,14 +2049,14 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP9:%.*]] = load i64, i64* [[M_CASTED]], align 8
 // CHECK10-NEXT:    [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]]
 // CHECK10-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
-// CHECK10-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP13]], align 8
-// CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP15]], align 8
-// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP16]], align 8
+// CHECK10-NEXT:    [[TMP12:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP12]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes to i8*), i64 40, i1 false)
+// CHECK10-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP14]], align 8
+// CHECK10-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP16]], align 8
 // CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK10-NEXT:    store i8* null, i8** [[TMP17]], align 8
 // CHECK10-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2071,75 +2065,69 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP9]], i64* [[TMP21]], align 8
-// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 4, i64* [[TMP22]], align 8
-// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP25]], align 8
-// CHECK10-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP27]], align 8
-// CHECK10-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 8, i64* [[TMP28]], align 8
-// CHECK10-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP29]], align 8
-// CHECK10-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP24]], align 8
+// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP26]], align 8
+// CHECK10-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP27]], align 8
+// CHECK10-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP3]], i64* [[TMP29]], align 8
+// CHECK10-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK10-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP3]], i64* [[TMP31]], align 8
-// CHECK10-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK10-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP3]], i64* [[TMP33]], align 8
-// CHECK10-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK10-NEXT:    store i64 8, i64* [[TMP34]], align 8
-// CHECK10-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK10-NEXT:    store i8* null, i8** [[TMP35]], align 8
-// CHECK10-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK10-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP37]], align 8
-// CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK10-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP39]], align 8
-// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK10-NEXT:    store i64 [[TMP11]], i64* [[TMP40]], align 8
-// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK10-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK10-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP45:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[TMP46:%.*]] = load i32, i32* [[M]], align 4
-// CHECK10-NEXT:    store i32 [[TMP46]], i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK10-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK10-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK10-NEXT:    store i8* null, i8** [[TMP32]], align 8
+// CHECK10-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK10-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP34]], align 8
+// CHECK10-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK10-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 8
+// CHECK10-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK10-NEXT:    store i64 [[TMP11]], i64* [[TMP37]], align 8
+// CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK10-NEXT:    store i8* null, i8** [[TMP38]], align 8
+// CHECK10-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP42:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[TMP43:%.*]] = load i32, i32* [[M]], align 4
+// CHECK10-NEXT:    store i32 [[TMP43]], i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK10-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK10-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK10-NEXT:    [[CONV5:%.*]] = sext i32 [[DIV]] to i64
-// CHECK10-NEXT:    [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK10-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP48]], 0
+// CHECK10-NEXT:    [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK10-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0
 // CHECK10-NEXT:    [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
 // CHECK10-NEXT:    [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
 // CHECK10-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]]
 // CHECK10-NEXT:    [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK10-NEXT:    store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK10-NEXT:    [[TMP49:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP49]], 1
+// CHECK10-NEXT:    [[TMP46:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
+// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP46]], 1
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK10-NEXT:    [[TMP50:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, i32 5, i8** [[TMP42]], i8** [[TMP43]], i64* [[TMP44]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
-// CHECK10-NEXT:    br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK10-NEXT:    [[TMP47:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, i32 5, i8** [[TMP39]], i8** [[TMP40]], i64* [[TMP41]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0
+// CHECK10-NEXT:    br i1 [[TMP48]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
 // CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK10:       omp_offload.cont:
-// CHECK10-NEXT:    [[TMP52:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP52]])
+// CHECK10-NEXT:    [[TMP49:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP49]])
 // CHECK10-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    [[TMP53:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP53]])
-// CHECK10-NEXT:    [[TMP54:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    ret i32 [[TMP54]]
+// CHECK10-NEXT:    [[TMP50:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP50]])
+// CHECK10-NEXT:    [[TMP51:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK10-NEXT:    ret i32 [[TMP51]]
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81
@@ -2489,7 +2477,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -2510,7 +2498,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 20)
-// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK10-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK10-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
@@ -2703,7 +2691,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK10-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK10-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK10-NEXT:    ret void
@@ -2753,14 +2741,14 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP0]], [[TMP1]]
 // CHECK11-NEXT:    [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4
 // CHECK11-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
-// CHECK11-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP12]], align 4
-// CHECK11-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP14]], align 4
-// CHECK11-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP15]], align 4
+// CHECK11-NEXT:    [[TMP11:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes to i8*), i32 40, i1 false)
+// CHECK11-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP13]], align 4
+// CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP15]], align 4
 // CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK11-NEXT:    store i8* null, i8** [[TMP16]], align 4
 // CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2769,75 +2757,69 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP7]], i32* [[TMP20]], align 4
-// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP21]], align 4
-// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP22]], align 4
-// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP24]], align 4
-// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP26]], align 4
-// CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 4, i64* [[TMP27]], align 4
-// CHECK11-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP28]], align 4
-// CHECK11-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP21]], align 4
+// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP23]], align 4
+// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP25]], align 4
+// CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP26]], align 4
+// CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP1]], i32* [[TMP28]], align 4
+// CHECK11-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK11-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP1]], i32* [[TMP30]], align 4
-// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP1]], i32* [[TMP32]], align 4
-// CHECK11-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK11-NEXT:    store i64 4, i64* [[TMP33]], align 4
-// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP34]], align 4
-// CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 4
-// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP38]], align 4
-// CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK11-NEXT:    store i64 [[TMP10]], i64* [[TMP39]], align 4
-// CHECK11-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK11-NEXT:    store i8* null, i8** [[TMP40]], align 4
-// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP44:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP44]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[TMP45:%.*]] = load i32, i32* [[M]], align 4
-// CHECK11-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK11-NEXT:    [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP46]], 0
+// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP31]], align 4
+// CHECK11-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP33]], align 4
+// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP35]], align 4
+// CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK11-NEXT:    store i64 [[TMP10]], i64* [[TMP36]], align 4
+// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK11-NEXT:    store i8* null, i8** [[TMP37]], align 4
+// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP41:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP41]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[TMP42:%.*]] = load i32, i32* [[M]], align 4
+// CHECK11-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT:    [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0
 // CHECK11-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK11-NEXT:    [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK11-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK11-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK11-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK11-NEXT:    [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
 // CHECK11-NEXT:    [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
 // CHECK11-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
 // CHECK11-NEXT:    [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK11-NEXT:    store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK11-NEXT:    [[TMP48:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP48]], 1
+// CHECK11-NEXT:    [[TMP45:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP45]], 1
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK11-NEXT:    [[TMP49:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, i32 5, i8** [[TMP41]], i8** [[TMP42]], i64* [[TMP43]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0
-// CHECK11-NEXT:    br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[TMP46:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, i32 5, i8** [[TMP38]], i8** [[TMP39]], i64* [[TMP40]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0
+// CHECK11-NEXT:    br i1 [[TMP47]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[TMP51:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP51]])
+// CHECK11-NEXT:    [[TMP48:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP48]])
 // CHECK11-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    [[TMP52:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP52]])
-// CHECK11-NEXT:    [[TMP53:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    ret i32 [[TMP53]]
+// CHECK11-NEXT:    [[TMP49:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP49]])
+// CHECK11-NEXT:    [[TMP50:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK11-NEXT:    ret i32 [[TMP50]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81
@@ -3179,7 +3161,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -3200,7 +3182,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 20)
-// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK11-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK11-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
@@ -3387,7 +3369,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK11-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK11-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK11-NEXT:    ret void
@@ -3437,14 +3419,14 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP0]], [[TMP1]]
 // CHECK12-NEXT:    [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4
 // CHECK12-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
-// CHECK12-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP12]], align 4
-// CHECK12-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP14]], align 4
-// CHECK12-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP15]], align 4
+// CHECK12-NEXT:    [[TMP11:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes to i8*), i32 40, i1 false)
+// CHECK12-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP13]], align 4
+// CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP15]], align 4
 // CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK12-NEXT:    store i8* null, i8** [[TMP16]], align 4
 // CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -3453,75 +3435,69 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP7]], i32* [[TMP20]], align 4
-// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP21]], align 4
-// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP22]], align 4
-// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP24]], align 4
-// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP26]], align 4
-// CHECK12-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 4, i64* [[TMP27]], align 4
-// CHECK12-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP28]], align 4
-// CHECK12-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP21]], align 4
+// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP23]], align 4
+// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP25]], align 4
+// CHECK12-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP26]], align 4
+// CHECK12-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP1]], i32* [[TMP28]], align 4
+// CHECK12-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK12-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP1]], i32* [[TMP30]], align 4
-// CHECK12-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK12-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP1]], i32* [[TMP32]], align 4
-// CHECK12-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK12-NEXT:    store i64 4, i64* [[TMP33]], align 4
-// CHECK12-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK12-NEXT:    store i8* null, i8** [[TMP34]], align 4
-// CHECK12-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK12-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 4
-// CHECK12-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK12-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP38]], align 4
-// CHECK12-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK12-NEXT:    store i64 [[TMP10]], i64* [[TMP39]], align 4
-// CHECK12-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK12-NEXT:    store i8* null, i8** [[TMP40]], align 4
-// CHECK12-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP44:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP44]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[TMP45:%.*]] = load i32, i32* [[M]], align 4
-// CHECK12-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK12-NEXT:    [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP46]], 0
+// CHECK12-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK12-NEXT:    store i8* null, i8** [[TMP31]], align 4
+// CHECK12-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK12-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP33]], align 4
+// CHECK12-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK12-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP35]], align 4
+// CHECK12-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK12-NEXT:    store i64 [[TMP10]], i64* [[TMP36]], align 4
+// CHECK12-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK12-NEXT:    store i8* null, i8** [[TMP37]], align 4
+// CHECK12-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP41:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP41]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[TMP42:%.*]] = load i32, i32* [[M]], align 4
+// CHECK12-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK12-NEXT:    [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0
 // CHECK12-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK12-NEXT:    [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK12-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK12-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK12-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK12-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK12-NEXT:    [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
 // CHECK12-NEXT:    [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
 // CHECK12-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
 // CHECK12-NEXT:    [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK12-NEXT:    store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK12-NEXT:    [[TMP48:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP48]], 1
+// CHECK12-NEXT:    [[TMP45:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP45]], 1
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK12-NEXT:    [[TMP49:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, i32 5, i8** [[TMP41]], i8** [[TMP42]], i64* [[TMP43]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0
-// CHECK12-NEXT:    br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK12-NEXT:    [[TMP46:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, i32 5, i8** [[TMP38]], i8** [[TMP39]], i64* [[TMP40]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0
+// CHECK12-NEXT:    br i1 [[TMP47]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
 // CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK12:       omp_offload.cont:
-// CHECK12-NEXT:    [[TMP51:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP51]])
+// CHECK12-NEXT:    [[TMP48:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP48]])
 // CHECK12-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    [[TMP52:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP52]])
-// CHECK12-NEXT:    [[TMP53:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    ret i32 [[TMP53]]
+// CHECK12-NEXT:    [[TMP49:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP49]])
+// CHECK12-NEXT:    [[TMP50:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK12-NEXT:    ret i32 [[TMP50]]
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81
@@ -3863,7 +3839,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK12-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -3884,7 +3860,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 20)
-// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK12-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK12-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
@@ -4071,7 +4047,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK12-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK12-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK12-NEXT:    ret void

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
index 7c873d97846d9..7b27d063cb8a6 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
@@ -3094,14 +3094,14 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK9-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK9-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK9-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK9-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK9-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK9-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK9-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK9-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK9-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -3110,176 +3110,168 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK9-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK9-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK9-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK9-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK9-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK9-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK9-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK9-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK9-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK9-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK9-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK9-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK9-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK9-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK9-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK9-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i64 24, i1 false)
 // CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK9-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK9-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK9-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK9-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK9-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK9-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK9-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK9-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK9-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK9-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK9-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK9-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK9-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK9-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK9-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK9-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK9-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP60]])
-// CHECK9-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK9-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK9-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK9-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK9-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP58]])
+// CHECK9-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK9-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK9:       omp_offload.failed16:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK9:       omp_offload.cont17:
-// CHECK9-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M]], align 4
-// CHECK9-NEXT:    store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK9-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[TMP61:%.*]] = load i32, i32* [[M]], align 4
+// CHECK9-NEXT:    store i32 [[TMP61]], i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK9-NEXT:    [[TMP62:%.*]] = load i32, i32* [[N]], align 4
 // CHECK9-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP64]], i32* [[CONV20]], align 4
-// CHECK9-NEXT:    [[TMP65:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK9-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK9-NEXT:    store i32 [[TMP62]], i32* [[CONV20]], align 4
+// CHECK9-NEXT:    [[TMP63:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK9-NEXT:    [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
 // CHECK9-NEXT:    [[CONV21:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP66]], i32* [[CONV21]], align 4
-// CHECK9-NEXT:    [[TMP67:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK9-NEXT:    [[TMP68:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK9-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP65]], i64* [[TMP70]], align 8
-// CHECK9-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP65]], i64* [[TMP72]], align 8
-// CHECK9-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP73]], align 8
-// CHECK9-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP74]], align 8
-// CHECK9-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 [[TMP64]], i32* [[CONV21]], align 4
+// CHECK9-NEXT:    [[TMP65:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK9-NEXT:    [[TMP66:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK9-NEXT:    [[TMP67:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES25]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP67]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i64 32, i1 false)
+// CHECK9-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP63]], i64* [[TMP69]], align 8
+// CHECK9-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP63]], i64* [[TMP71]], align 8
+// CHECK9-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP72]], align 8
+// CHECK9-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP74]], align 8
+// CHECK9-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP76]], align 8
-// CHECK9-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
-// CHECK9-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 8, i64* [[TMP79]], align 8
-// CHECK9-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP80]], align 8
-// CHECK9-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP82]], align 8
-// CHECK9-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 8
-// CHECK9-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 [[TMP68]], i64* [[TMP85]], align 8
-// CHECK9-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP86]], align 8
-// CHECK9-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 8
-// CHECK9-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK9-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 3
-// CHECK9-NEXT:    store i64 4, i64* [[TMP91]], align 8
-// CHECK9-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP92]], align 8
-// CHECK9-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP96:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP96]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK9-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK9-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP97]], 0
+// CHECK9-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP77]], align 8
+// CHECK9-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP79]], align 8
+// CHECK9-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 8
+// CHECK9-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP66]], i64* [[TMP82]], align 8
+// CHECK9-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP83]], align 8
+// CHECK9-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP65]], i64* [[TMP85]], align 8
+// CHECK9-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP65]], i64* [[TMP87]], align 8
+// CHECK9-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP88]], align 8
+// CHECK9-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP92:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP92]], i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK9-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK9-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP93]], 0
 // CHECK9-NEXT:    [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1
 // CHECK9-NEXT:    [[SUB31:%.*]] = sub nsw i32 [[DIV30]], 1
 // CHECK9-NEXT:    store i32 [[SUB31]], i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK9-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK9-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP98]], 1
-// CHECK9-NEXT:    [[TMP99:%.*]] = zext i32 [[ADD32]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP99]])
-// CHECK9-NEXT:    [[TMP100:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108.region_id, i32 4, i8** [[TMP93]], i8** [[TMP94]], i64* [[TMP95]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP101:%.*]] = icmp ne i32 [[TMP100]], 0
-// CHECK9-NEXT:    br i1 [[TMP101]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
+// CHECK9-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
+// CHECK9-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP94]], 1
+// CHECK9-NEXT:    [[TMP95:%.*]] = zext i32 [[ADD32]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP95]])
+// CHECK9-NEXT:    [[TMP96:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108.region_id, i32 4, i8** [[TMP89]], i8** [[TMP90]], i64* [[TMP91]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP97:%.*]] = icmp ne i32 [[TMP96]], 0
+// CHECK9-NEXT:    br i1 [[TMP97]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
 // CHECK9:       omp_offload.failed33:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108(i64 [[TMP65]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP67]]) #[[ATTR3]]
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108(i64 [[TMP63]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP65]]) #[[ATTR3]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT34]]
 // CHECK9:       omp_offload.cont34:
-// CHECK9-NEXT:    [[TMP102:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP102]])
+// CHECK9-NEXT:    [[TMP98:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP98]])
 // CHECK9-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    [[TMP103:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP103]])
-// CHECK9-NEXT:    [[TMP104:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    ret i32 [[TMP104]]
+// CHECK9-NEXT:    [[TMP99:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP99]])
+// CHECK9-NEXT:    [[TMP100:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    ret i32 [[TMP100]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100
@@ -3799,11 +3791,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK9-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK9-NEXT:    [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3890,7 +3882,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK9-NEXT:    store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !24
 // CHECK9-NEXT:    [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !24
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]), !llvm.access.group !24
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]), !llvm.access.group !24
 // CHECK9-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK9:       omp.inner.for.inc:
 // CHECK9-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24
@@ -3944,7 +3936,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -4065,7 +4057,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -4097,7 +4089,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l81.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l81.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK9-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK9-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
@@ -4115,7 +4107,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK9-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l85.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.13, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l85.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK9-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK9-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK9:       omp_offload.failed5:
@@ -4147,7 +4139,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK9-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK9-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK9-NEXT:    br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK9:       omp_offload.failed11:
@@ -4163,11 +4155,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK9-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -4215,7 +4207,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK9-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30
 // CHECK9-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !30
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !30
 // CHECK9-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK9:       omp.inner.for.inc:
 // CHECK9-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30
@@ -4237,7 +4229,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -4327,11 +4319,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK9-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -4379,7 +4371,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK9-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36
 // CHECK9-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !36
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !36
 // CHECK9-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK9:       omp.inner.for.inc:
 // CHECK9-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36
@@ -4401,7 +4393,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -4499,11 +4491,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK9-NEXT:    store i32 [[TMP1]], i32* [[CONV1]], align 4
 // CHECK9-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -4559,7 +4551,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK9-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !42
 // CHECK9-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !42
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !42
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !42
 // CHECK9-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK9:       omp.inner.for.inc:
 // CHECK9-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
@@ -4602,7 +4594,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -4690,7 +4682,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK9-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK9-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK9-NEXT:    ret void
@@ -4748,14 +4740,14 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK10-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK10-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK10-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK10-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK10-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK10-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK10-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK10-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK10-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK10-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -4764,176 +4756,168 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK10-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK10-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK10-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK10-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK10-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK10-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK10-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK10-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK10-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK10-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK10-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK10-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK10-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK10-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK10-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
 // CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK10:       omp_offload.cont:
-// CHECK10-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK10-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK10-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK10-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK10-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK10-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK10-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK10-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK10-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i64 24, i1 false)
 // CHECK10-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK10-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK10-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK10-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK10-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK10-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK10-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK10-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK10-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK10-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK10-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK10-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK10-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK10-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK10-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK10-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK10-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK10-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK10-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK10-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK10-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK10-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK10-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK10-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK10-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK10-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK10-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK10-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK10-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK10-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK10-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK10-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK10-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK10-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK10-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP60]])
-// CHECK10-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK10-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK10-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK10-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK10-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP58]])
+// CHECK10-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK10-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK10:       omp_offload.failed16:
-// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK10:       omp_offload.cont17:
-// CHECK10-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M]], align 4
-// CHECK10-NEXT:    store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK10-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    [[TMP61:%.*]] = load i32, i32* [[M]], align 4
+// CHECK10-NEXT:    store i32 [[TMP61]], i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK10-NEXT:    [[TMP62:%.*]] = load i32, i32* [[N]], align 4
 // CHECK10-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK10-NEXT:    store i32 [[TMP64]], i32* [[CONV20]], align 4
-// CHECK10-NEXT:    [[TMP65:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK10-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK10-NEXT:    store i32 [[TMP62]], i32* [[CONV20]], align 4
+// CHECK10-NEXT:    [[TMP63:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK10-NEXT:    [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
 // CHECK10-NEXT:    [[CONV21:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
-// CHECK10-NEXT:    store i32 [[TMP66]], i32* [[CONV21]], align 4
-// CHECK10-NEXT:    [[TMP67:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK10-NEXT:    [[TMP68:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK10-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP65]], i64* [[TMP70]], align 8
-// CHECK10-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP65]], i64* [[TMP72]], align 8
-// CHECK10-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP73]], align 8
-// CHECK10-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
-// CHECK10-NEXT:    store i8* null, i8** [[TMP74]], align 8
-// CHECK10-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK10-NEXT:    store i32 [[TMP64]], i32* [[CONV21]], align 4
+// CHECK10-NEXT:    [[TMP65:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK10-NEXT:    [[TMP66:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK10-NEXT:    [[TMP67:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES25]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP67]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i64 32, i1 false)
+// CHECK10-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP63]], i64* [[TMP69]], align 8
+// CHECK10-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP63]], i64* [[TMP71]], align 8
+// CHECK10-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
+// CHECK10-NEXT:    store i8* null, i8** [[TMP72]], align 8
+// CHECK10-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK10-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP74]], align 8
+// CHECK10-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP76]], align 8
-// CHECK10-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
-// CHECK10-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
-// CHECK10-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 8, i64* [[TMP79]], align 8
-// CHECK10-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP80]], align 8
-// CHECK10-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP82]], align 8
-// CHECK10-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 8
-// CHECK10-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 [[TMP68]], i64* [[TMP85]], align 8
-// CHECK10-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP86]], align 8
-// CHECK10-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
-// CHECK10-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 8
-// CHECK10-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
-// CHECK10-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK10-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 3
-// CHECK10-NEXT:    store i64 4, i64* [[TMP91]], align 8
-// CHECK10-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
-// CHECK10-NEXT:    store i8* null, i8** [[TMP92]], align 8
-// CHECK10-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP96:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP96]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK10-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK10-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP97]], 0
+// CHECK10-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP77]], align 8
+// CHECK10-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP79]], align 8
+// CHECK10-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 8
+// CHECK10-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP66]], i64* [[TMP82]], align 8
+// CHECK10-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP83]], align 8
+// CHECK10-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP65]], i64* [[TMP85]], align 8
+// CHECK10-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP65]], i64* [[TMP87]], align 8
+// CHECK10-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
+// CHECK10-NEXT:    store i8* null, i8** [[TMP88]], align 8
+// CHECK10-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP92:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP92]], i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK10-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK10-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP93]], 0
 // CHECK10-NEXT:    [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1
 // CHECK10-NEXT:    [[SUB31:%.*]] = sub nsw i32 [[DIV30]], 1
 // CHECK10-NEXT:    store i32 [[SUB31]], i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK10-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK10-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP98]], 1
-// CHECK10-NEXT:    [[TMP99:%.*]] = zext i32 [[ADD32]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP99]])
-// CHECK10-NEXT:    [[TMP100:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108.region_id, i32 4, i8** [[TMP93]], i8** [[TMP94]], i64* [[TMP95]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP101:%.*]] = icmp ne i32 [[TMP100]], 0
-// CHECK10-NEXT:    br i1 [[TMP101]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
+// CHECK10-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
+// CHECK10-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP94]], 1
+// CHECK10-NEXT:    [[TMP95:%.*]] = zext i32 [[ADD32]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP95]])
+// CHECK10-NEXT:    [[TMP96:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108.region_id, i32 4, i8** [[TMP89]], i8** [[TMP90]], i64* [[TMP91]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP97:%.*]] = icmp ne i32 [[TMP96]], 0
+// CHECK10-NEXT:    br i1 [[TMP97]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
 // CHECK10:       omp_offload.failed33:
-// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108(i64 [[TMP65]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP67]]) #[[ATTR3]]
+// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108(i64 [[TMP63]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP65]]) #[[ATTR3]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT34]]
 // CHECK10:       omp_offload.cont34:
-// CHECK10-NEXT:    [[TMP102:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP102]])
+// CHECK10-NEXT:    [[TMP98:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP98]])
 // CHECK10-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    [[TMP103:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP103]])
-// CHECK10-NEXT:    [[TMP104:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    ret i32 [[TMP104]]
+// CHECK10-NEXT:    [[TMP99:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP99]])
+// CHECK10-NEXT:    [[TMP100:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK10-NEXT:    ret i32 [[TMP100]]
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100
@@ -5453,11 +5437,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK10-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK10-NEXT:    [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -5544,7 +5528,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK10-NEXT:    store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !24
 // CHECK10-NEXT:    [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !24
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]), !llvm.access.group !24
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]), !llvm.access.group !24
 // CHECK10-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK10:       omp.inner.for.inc:
 // CHECK10-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24
@@ -5598,7 +5582,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -5719,7 +5703,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -5751,7 +5735,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l81.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l81.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK10-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK10-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
@@ -5769,7 +5753,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK10-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l85.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.13, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l85.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK10-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK10-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK10:       omp_offload.failed5:
@@ -5801,7 +5785,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK10-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK10-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK10-NEXT:    br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK10:       omp_offload.failed11:
@@ -5817,11 +5801,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK10-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK10-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -5869,7 +5853,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK10-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30
 // CHECK10-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !30
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !30
 // CHECK10-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK10:       omp.inner.for.inc:
 // CHECK10-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30
@@ -5891,7 +5875,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -5981,11 +5965,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK10-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK10-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -6033,7 +6017,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK10-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36
 // CHECK10-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !36
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !36
 // CHECK10-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK10:       omp.inner.for.inc:
 // CHECK10-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36
@@ -6055,7 +6039,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -6153,11 +6137,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK10-NEXT:    store i32 [[TMP1]], i32* [[CONV1]], align 4
 // CHECK10-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -6213,7 +6197,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK10-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !42
 // CHECK10-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !42
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !42
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !42
 // CHECK10-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK10:       omp.inner.for.inc:
 // CHECK10-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
@@ -6256,7 +6240,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -6344,7 +6328,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK10-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK10-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK10-NEXT:    ret void
@@ -6401,14 +6385,14 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK11-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK11-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK11-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK11-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK11-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK11-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK11-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK11-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK11-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -6417,175 +6401,167 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK11-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK11-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK11-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK11-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK11-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK11-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK11-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK11-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK11-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK11-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK11-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK11-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK11-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK11-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK11-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK11-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK11-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK11-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK11-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i32 24, i1 false)
 // CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK11-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK11-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK11-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK11-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK11-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK11-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK11-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK11-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK11-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK11-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK11-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK11-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK11-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK11-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK11-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK11-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK11-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK11-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK11-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK11-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK11-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK11-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK11-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP61]])
-// CHECK11-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK11-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK11-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK11-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK11-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP59]])
+// CHECK11-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK11-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK11:       omp_offload.failed15:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK11:       omp_offload.cont16:
-// CHECK11-NEXT:    [[TMP64:%.*]] = load i32, i32* [[M]], align 4
-// CHECK11-NEXT:    store i32 [[TMP64]], i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK11-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP65]], i32* [[N_CASTED18]], align 4
-// CHECK11-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N_CASTED18]], align 4
-// CHECK11-NEXT:    [[TMP67:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK11-NEXT:    store i32 [[TMP67]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK11-NEXT:    [[TMP68:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK11-NEXT:    [[TMP69:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK11-NEXT:    [[TMP70:%.*]] = sext i32 [[TMP69]] to i64
-// CHECK11-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP66]], i32* [[TMP72]], align 4
-// CHECK11-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP66]], i32* [[TMP74]], align 4
-// CHECK11-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP75]], align 4
-// CHECK11-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP76]], align 4
-// CHECK11-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP62:%.*]] = load i32, i32* [[M]], align 4
+// CHECK11-NEXT:    store i32 [[TMP62]], i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK11-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP63]], i32* [[N_CASTED18]], align 4
+// CHECK11-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N_CASTED18]], align 4
+// CHECK11-NEXT:    [[TMP65:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK11-NEXT:    store i32 [[TMP65]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK11-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK11-NEXT:    [[TMP67:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK11-NEXT:    [[TMP68:%.*]] = sext i32 [[TMP67]] to i64
+// CHECK11-NEXT:    [[TMP69:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES22]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP69]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i32 32, i1 false)
+// CHECK11-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP64]], i32* [[TMP71]], align 4
+// CHECK11-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP64]], i32* [[TMP73]], align 4
+// CHECK11-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP74]], align 4
+// CHECK11-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP76]], align 4
+// CHECK11-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP78]], align 4
-// CHECK11-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
-// CHECK11-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP81]], align 4
-// CHECK11-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP82]], align 4
-// CHECK11-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 4
-// CHECK11-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP86]], align 4
-// CHECK11-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 [[TMP70]], i64* [[TMP87]], align 4
-// CHECK11-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP88]], align 4
-// CHECK11-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP68]], i32* [[TMP90]], align 4
-// CHECK11-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP68]], i32* [[TMP92]], align 4
-// CHECK11-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 3
-// CHECK11-NEXT:    store i64 4, i64* [[TMP93]], align 4
-// CHECK11-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP94]], align 4
-// CHECK11-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP98:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP98]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK11-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK11-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP99]], 0
+// CHECK11-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP79]], align 4
+// CHECK11-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 4
+// CHECK11-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 4
+// CHECK11-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
+// CHECK11-NEXT:    store i64 [[TMP68]], i64* [[TMP84]], align 4
+// CHECK11-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP85]], align 4
+// CHECK11-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP66]], i32* [[TMP87]], align 4
+// CHECK11-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP66]], i32* [[TMP89]], align 4
+// CHECK11-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP90]], align 4
+// CHECK11-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP94:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP94]], i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK11-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK11-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP95]], 0
 // CHECK11-NEXT:    [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1
 // CHECK11-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[DIV27]], 1
 // CHECK11-NEXT:    store i32 [[SUB28]], i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK11-NEXT:    [[TMP100:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK11-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP100]], 1
-// CHECK11-NEXT:    [[TMP101:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP101]])
-// CHECK11-NEXT:    [[TMP102:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108.region_id, i32 4, i8** [[TMP95]], i8** [[TMP96]], i64* [[TMP97]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP103:%.*]] = icmp ne i32 [[TMP102]], 0
-// CHECK11-NEXT:    br i1 [[TMP103]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK11-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK11-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP96]], 1
+// CHECK11-NEXT:    [[TMP97:%.*]] = zext i32 [[ADD29]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP97]])
+// CHECK11-NEXT:    [[TMP98:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108.region_id, i32 4, i8** [[TMP91]], i8** [[TMP92]], i64* [[TMP93]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP99:%.*]] = icmp ne i32 [[TMP98]], 0
+// CHECK11-NEXT:    br i1 [[TMP99]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
 // CHECK11:       omp_offload.failed30:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108(i32 [[TMP66]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP68]]) #[[ATTR3]]
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108(i32 [[TMP64]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP66]]) #[[ATTR3]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
 // CHECK11:       omp_offload.cont31:
-// CHECK11-NEXT:    [[TMP104:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP104]])
+// CHECK11-NEXT:    [[TMP100:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP100]])
 // CHECK11-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    [[TMP105:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP105]])
-// CHECK11-NEXT:    [[TMP106:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    ret i32 [[TMP106]]
+// CHECK11-NEXT:    [[TMP101:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP101]])
+// CHECK11-NEXT:    [[TMP102:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK11-NEXT:    ret i32 [[TMP102]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100
@@ -7081,11 +7057,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK11-NEXT:    store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK11-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -7166,7 +7142,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !25
 // CHECK11-NEXT:    store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]), !llvm.access.group !25
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]), !llvm.access.group !25
 // CHECK11-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK11:       omp.inner.for.inc:
 // CHECK11-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
@@ -7220,7 +7196,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -7336,7 +7312,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -7368,7 +7344,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l81.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l81.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK11-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK11-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
@@ -7386,7 +7362,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK11-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l85.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.13, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l85.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK11-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK11-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK11:       omp_offload.failed5:
@@ -7417,7 +7393,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK11-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK11-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK11-NEXT:    br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK11:       omp_offload.failed11:
@@ -7433,11 +7409,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK11-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -7483,7 +7459,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.body:
 // CHECK11-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31
 // CHECK11-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !31
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !31
 // CHECK11-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK11:       omp.inner.for.inc:
 // CHECK11-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31
@@ -7505,7 +7481,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -7592,11 +7568,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK11-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -7642,7 +7618,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.body:
 // CHECK11-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !37
 // CHECK11-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !37
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !37
 // CHECK11-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK11:       omp.inner.for.inc:
 // CHECK11-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37
@@ -7664,7 +7640,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -7757,11 +7733,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK11-NEXT:    store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK11-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -7813,7 +7789,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !43
 // CHECK11-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !43
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !43
 // CHECK11-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK11:       omp.inner.for.inc:
 // CHECK11-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43
@@ -7856,7 +7832,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -7940,7 +7916,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK11-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK11-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK11-NEXT:    ret void
@@ -7997,14 +7973,14 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK12-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK12-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK12-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK12-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK12-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK12-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK12-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK12-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK12-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK12-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -8013,175 +7989,167 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK12-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK12-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK12-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK12-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK12-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK12-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK12-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK12-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK12-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK12-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK12-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK12-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK12-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK12-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK12-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
 // CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK12:       omp_offload.cont:
-// CHECK12-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK12-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK12-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK12-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK12-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK12-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK12-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK12-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK12-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i32 24, i1 false)
 // CHECK12-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK12-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK12-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK12-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK12-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK12-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK12-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK12-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK12-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK12-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK12-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK12-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK12-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK12-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK12-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK12-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK12-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK12-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK12-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK12-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK12-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK12-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK12-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK12-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK12-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK12-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK12-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK12-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK12-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK12-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK12-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK12-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK12-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK12-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK12-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK12-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK12-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK12-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK12-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK12-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP61]])
-// CHECK12-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK12-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK12-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK12-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK12-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP59]])
+// CHECK12-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK12-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK12:       omp_offload.failed15:
-// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK12:       omp_offload.cont16:
-// CHECK12-NEXT:    [[TMP64:%.*]] = load i32, i32* [[M]], align 4
-// CHECK12-NEXT:    store i32 [[TMP64]], i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK12-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP65]], i32* [[N_CASTED18]], align 4
-// CHECK12-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N_CASTED18]], align 4
-// CHECK12-NEXT:    [[TMP67:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK12-NEXT:    store i32 [[TMP67]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK12-NEXT:    [[TMP68:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK12-NEXT:    [[TMP69:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK12-NEXT:    [[TMP70:%.*]] = sext i32 [[TMP69]] to i64
-// CHECK12-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP66]], i32* [[TMP72]], align 4
-// CHECK12-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP66]], i32* [[TMP74]], align 4
-// CHECK12-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP75]], align 4
-// CHECK12-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
-// CHECK12-NEXT:    store i8* null, i8** [[TMP76]], align 4
-// CHECK12-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP62:%.*]] = load i32, i32* [[M]], align 4
+// CHECK12-NEXT:    store i32 [[TMP62]], i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK12-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP63]], i32* [[N_CASTED18]], align 4
+// CHECK12-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N_CASTED18]], align 4
+// CHECK12-NEXT:    [[TMP65:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK12-NEXT:    store i32 [[TMP65]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK12-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK12-NEXT:    [[TMP67:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK12-NEXT:    [[TMP68:%.*]] = sext i32 [[TMP67]] to i64
+// CHECK12-NEXT:    [[TMP69:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES22]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP69]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i32 32, i1 false)
+// CHECK12-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP64]], i32* [[TMP71]], align 4
+// CHECK12-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP64]], i32* [[TMP73]], align 4
+// CHECK12-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
+// CHECK12-NEXT:    store i8* null, i8** [[TMP74]], align 4
+// CHECK12-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP76]], align 4
+// CHECK12-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP78]], align 4
-// CHECK12-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
-// CHECK12-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
-// CHECK12-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP81]], align 4
-// CHECK12-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP82]], align 4
-// CHECK12-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 4
-// CHECK12-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP86]], align 4
-// CHECK12-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 [[TMP70]], i64* [[TMP87]], align 4
-// CHECK12-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP88]], align 4
-// CHECK12-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
-// CHECK12-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP68]], i32* [[TMP90]], align 4
-// CHECK12-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
-// CHECK12-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP68]], i32* [[TMP92]], align 4
-// CHECK12-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 3
-// CHECK12-NEXT:    store i64 4, i64* [[TMP93]], align 4
-// CHECK12-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
-// CHECK12-NEXT:    store i8* null, i8** [[TMP94]], align 4
-// CHECK12-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP98:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP98]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK12-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK12-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP99]], 0
+// CHECK12-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP79]], align 4
+// CHECK12-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 4
+// CHECK12-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 4
+// CHECK12-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
+// CHECK12-NEXT:    store i64 [[TMP68]], i64* [[TMP84]], align 4
+// CHECK12-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP85]], align 4
+// CHECK12-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP66]], i32* [[TMP87]], align 4
+// CHECK12-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP66]], i32* [[TMP89]], align 4
+// CHECK12-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
+// CHECK12-NEXT:    store i8* null, i8** [[TMP90]], align 4
+// CHECK12-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP94:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP94]], i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK12-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK12-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP95]], 0
 // CHECK12-NEXT:    [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1
 // CHECK12-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[DIV27]], 1
 // CHECK12-NEXT:    store i32 [[SUB28]], i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK12-NEXT:    [[TMP100:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK12-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP100]], 1
-// CHECK12-NEXT:    [[TMP101:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP101]])
-// CHECK12-NEXT:    [[TMP102:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108.region_id, i32 4, i8** [[TMP95]], i8** [[TMP96]], i64* [[TMP97]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP103:%.*]] = icmp ne i32 [[TMP102]], 0
-// CHECK12-NEXT:    br i1 [[TMP103]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK12-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK12-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP96]], 1
+// CHECK12-NEXT:    [[TMP97:%.*]] = zext i32 [[ADD29]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP97]])
+// CHECK12-NEXT:    [[TMP98:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108.region_id, i32 4, i8** [[TMP91]], i8** [[TMP92]], i64* [[TMP93]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP99:%.*]] = icmp ne i32 [[TMP98]], 0
+// CHECK12-NEXT:    br i1 [[TMP99]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
 // CHECK12:       omp_offload.failed30:
-// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108(i32 [[TMP66]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP68]]) #[[ATTR3]]
+// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108(i32 [[TMP64]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP66]]) #[[ATTR3]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
 // CHECK12:       omp_offload.cont31:
-// CHECK12-NEXT:    [[TMP104:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP104]])
+// CHECK12-NEXT:    [[TMP100:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP100]])
 // CHECK12-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    [[TMP105:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP105]])
-// CHECK12-NEXT:    [[TMP106:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    ret i32 [[TMP106]]
+// CHECK12-NEXT:    [[TMP101:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP101]])
+// CHECK12-NEXT:    [[TMP102:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK12-NEXT:    ret i32 [[TMP102]]
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100
@@ -8677,11 +8645,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK12-NEXT:    store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK12-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -8762,7 +8730,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !25
 // CHECK12-NEXT:    store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25
 // CHECK12-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]), !llvm.access.group !25
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]), !llvm.access.group !25
 // CHECK12-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK12:       omp.inner.for.inc:
 // CHECK12-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
@@ -8816,7 +8784,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -8932,7 +8900,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK12-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -8964,7 +8932,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l81.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l81.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK12-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK12-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
@@ -8982,7 +8950,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK12-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l85.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.13, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l85.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK12-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK12-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK12:       omp_offload.failed5:
@@ -9013,7 +8981,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK12-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK12-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK12-NEXT:    br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK12:       omp_offload.failed11:
@@ -9029,11 +8997,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK12-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK12-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -9079,7 +9047,7 @@ int main (int argc, char **argv) {
 // CHECK12:       omp.inner.for.body:
 // CHECK12-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31
 // CHECK12-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !31
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !31
 // CHECK12-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK12:       omp.inner.for.inc:
 // CHECK12-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31
@@ -9101,7 +9069,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -9188,11 +9156,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK12-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK12-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -9238,7 +9206,7 @@ int main (int argc, char **argv) {
 // CHECK12:       omp.inner.for.body:
 // CHECK12-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !37
 // CHECK12-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !37
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !37
 // CHECK12-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK12:       omp.inner.for.inc:
 // CHECK12-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37
@@ -9260,7 +9228,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -9353,11 +9321,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK12-NEXT:    store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK12-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -9409,7 +9377,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !43
 // CHECK12-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43
 // CHECK12-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !43
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !43
 // CHECK12-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK12:       omp.inner.for.inc:
 // CHECK12-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43
@@ -9452,7 +9420,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -9536,7 +9504,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK12-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK12-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK12-NEXT:    ret void

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp
index 28ec03ad07697..0d3c5f631fe54 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp
@@ -8798,14 +8798,14 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK13-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK13-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK13-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK13-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK13-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK13-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK13-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK13-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK13-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK13-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK13-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK13-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK13-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -8814,304 +8814,290 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK13-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK13-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK13-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK13-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK13-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK13-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK13-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK13-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK13-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK13-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK13-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK13-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK13-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK13-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK13-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK13-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK13-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK13-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK13-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK13-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK13-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK13-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK13-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK13-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK13-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK13-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK13-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK13-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK13-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK13-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK13-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK13-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK13-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK13-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK13-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK13-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK13-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK13-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK13-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK13:       omp_offload.failed:
 // CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK13:       omp_offload.cont:
-// CHECK13-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK13-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK13-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK13-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK13-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK13-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK13-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK13-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK13-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK13-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i64 24, i1 false)
 // CHECK13-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK13-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK13-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK13-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK13-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK13-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK13-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK13-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK13-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK13-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK13-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK13-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK13-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK13-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK13-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK13-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK13-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK13-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK13-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK13-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK13-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK13-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK13-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK13-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK13-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK13-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK13-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK13-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK13-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK13-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK13-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK13-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK13-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK13-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK13-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK13-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK13-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK13-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK13-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK13-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK13-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK13-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK13-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK13-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK13-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK13-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK13-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK13-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP60]])
-// CHECK13-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK13-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK13-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK13-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK13-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP58]])
+// CHECK13-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK13-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK13:       omp_offload.failed16:
-// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK13:       omp_offload.cont17:
-// CHECK13-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M]], align 4
-// CHECK13-NEXT:    store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK13-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    [[TMP61:%.*]] = load i32, i32* [[M]], align 4
+// CHECK13-NEXT:    store i32 [[TMP61]], i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK13-NEXT:    [[TMP62:%.*]] = load i32, i32* [[N]], align 4
 // CHECK13-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK13-NEXT:    store i32 [[TMP64]], i32* [[CONV20]], align 4
-// CHECK13-NEXT:    [[TMP65:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK13-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK13-NEXT:    store i32 [[TMP62]], i32* [[CONV20]], align 4
+// CHECK13-NEXT:    [[TMP63:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK13-NEXT:    [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
 // CHECK13-NEXT:    [[CONV21:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
-// CHECK13-NEXT:    store i32 [[TMP66]], i32* [[CONV21]], align 4
-// CHECK13-NEXT:    [[TMP67:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    [[TMP68:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK13-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP65]], i64* [[TMP70]], align 8
-// CHECK13-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP65]], i64* [[TMP72]], align 8
-// CHECK13-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK13-NEXT:    store i64 4, i64* [[TMP73]], align 8
-// CHECK13-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
-// CHECK13-NEXT:    store i8* null, i8** [[TMP74]], align 8
-// CHECK13-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK13-NEXT:    store i32 [[TMP64]], i32* [[CONV21]], align 4
+// CHECK13-NEXT:    [[TMP65:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK13-NEXT:    [[TMP66:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK13-NEXT:    [[TMP67:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES25]] to i8*
+// CHECK13-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP67]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i64 32, i1 false)
+// CHECK13-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP63]], i64* [[TMP69]], align 8
+// CHECK13-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP63]], i64* [[TMP71]], align 8
+// CHECK13-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
+// CHECK13-NEXT:    store i8* null, i8** [[TMP72]], align 8
+// CHECK13-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP74]], align 8
+// CHECK13-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
 // CHECK13-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i64*
 // CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP76]], align 8
-// CHECK13-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
-// CHECK13-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
-// CHECK13-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 1
-// CHECK13-NEXT:    store i64 8, i64* [[TMP79]], align 8
-// CHECK13-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
-// CHECK13-NEXT:    store i8* null, i8** [[TMP80]], align 8
-// CHECK13-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP82]], align 8
-// CHECK13-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 8
-// CHECK13-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
-// CHECK13-NEXT:    store i64 [[TMP68]], i64* [[TMP85]], align 8
-// CHECK13-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
-// CHECK13-NEXT:    store i8* null, i8** [[TMP86]], align 8
-// CHECK13-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
-// CHECK13-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 8
-// CHECK13-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
-// CHECK13-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK13-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 3
-// CHECK13-NEXT:    store i64 4, i64* [[TMP91]], align 8
-// CHECK13-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
-// CHECK13-NEXT:    store i8* null, i8** [[TMP92]], align 8
-// CHECK13-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP96:%.*]] = load i32, i32* [[N]], align 4
-// CHECK13-NEXT:    store i32 [[TMP96]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK13-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK13-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP97]], 0
+// CHECK13-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
+// CHECK13-NEXT:    store i8* null, i8** [[TMP77]], align 8
+// CHECK13-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP79]], align 8
+// CHECK13-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 8
+// CHECK13-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
+// CHECK13-NEXT:    store i64 [[TMP66]], i64* [[TMP82]], align 8
+// CHECK13-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
+// CHECK13-NEXT:    store i8* null, i8** [[TMP83]], align 8
+// CHECK13-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
+// CHECK13-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP65]], i64* [[TMP85]], align 8
+// CHECK13-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
+// CHECK13-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP65]], i64* [[TMP87]], align 8
+// CHECK13-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
+// CHECK13-NEXT:    store i8* null, i8** [[TMP88]], align 8
+// CHECK13-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP92:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    store i32 [[TMP92]], i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK13-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK13-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP93]], 0
 // CHECK13-NEXT:    [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1
 // CHECK13-NEXT:    [[SUB31:%.*]] = sub nsw i32 [[DIV30]], 1
 // CHECK13-NEXT:    store i32 [[SUB31]], i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK13-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK13-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP98]], 1
-// CHECK13-NEXT:    [[TMP99:%.*]] = zext i32 [[ADD32]] to i64
-// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP99]])
-// CHECK13-NEXT:    [[TMP100:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP93]], i8** [[TMP94]], i64* [[TMP95]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP101:%.*]] = icmp ne i32 [[TMP100]], 0
-// CHECK13-NEXT:    br i1 [[TMP101]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
+// CHECK13-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
+// CHECK13-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP94]], 1
+// CHECK13-NEXT:    [[TMP95:%.*]] = zext i32 [[ADD32]] to i64
+// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP95]])
+// CHECK13-NEXT:    [[TMP96:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP89]], i8** [[TMP90]], i64* [[TMP91]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP97:%.*]] = icmp ne i32 [[TMP96]], 0
+// CHECK13-NEXT:    br i1 [[TMP97]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
 // CHECK13:       omp_offload.failed33:
-// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i64 [[TMP65]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP67]]) #[[ATTR3]]
+// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i64 [[TMP63]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP65]]) #[[ATTR3]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT34]]
 // CHECK13:       omp_offload.cont34:
-// CHECK13-NEXT:    [[TMP102:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    [[TMP98:%.*]] = load i32, i32* [[N]], align 4
 // CHECK13-NEXT:    [[CONV36:%.*]] = bitcast i64* [[N_CASTED35]] to i32*
-// CHECK13-NEXT:    store i32 [[TMP102]], i32* [[CONV36]], align 4
-// CHECK13-NEXT:    [[TMP103:%.*]] = load i64, i64* [[N_CASTED35]], align 8
-// CHECK13-NEXT:    [[TMP104:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK13-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP103]], i64* [[TMP106]], align 8
-// CHECK13-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
+// CHECK13-NEXT:    store i32 [[TMP98]], i32* [[CONV36]], align 4
+// CHECK13-NEXT:    [[TMP99:%.*]] = load i64, i64* [[N_CASTED35]], align 8
+// CHECK13-NEXT:    [[TMP100:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK13-NEXT:    [[TMP101:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES40]] to i8*
+// CHECK13-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP101]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i64 24, i1 false)
+// CHECK13-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP103:%.*]] = bitcast i8** [[TMP102]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP99]], i64* [[TMP103]], align 8
+// CHECK13-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP99]], i64* [[TMP105]], align 8
+// CHECK13-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 0
+// CHECK13-NEXT:    store i8* null, i8** [[TMP106]], align 8
+// CHECK13-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 1
 // CHECK13-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP103]], i64* [[TMP108]], align 8
-// CHECK13-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 0
-// CHECK13-NEXT:    store i64 4, i64* [[TMP109]], align 8
-// CHECK13-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 0
-// CHECK13-NEXT:    store i8* null, i8** [[TMP110]], align 8
-// CHECK13-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 1
-// CHECK13-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP112]], align 8
-// CHECK13-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 1
-// CHECK13-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP114]], align 8
-// CHECK13-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 1
-// CHECK13-NEXT:    store i64 8, i64* [[TMP115]], align 8
-// CHECK13-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 1
-// CHECK13-NEXT:    store i8* null, i8** [[TMP116]], align 8
-// CHECK13-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP118]], align 8
-// CHECK13-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP120]], align 8
-// CHECK13-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 2
-// CHECK13-NEXT:    store i64 [[TMP104]], i64* [[TMP121]], align 8
-// CHECK13-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 2
-// CHECK13-NEXT:    store i8* null, i8** [[TMP122]], align 8
-// CHECK13-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP126:%.*]] = load i32, i32* [[N]], align 4
-// CHECK13-NEXT:    store i32 [[TMP126]], i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK13-NEXT:    [[TMP127:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK13-NEXT:    [[SUB44:%.*]] = sub nsw i32 [[TMP127]], 0
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP108]], align 8
+// CHECK13-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP110]], align 8
+// CHECK13-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 1
+// CHECK13-NEXT:    store i8* null, i8** [[TMP111]], align 8
+// CHECK13-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP113]], align 8
+// CHECK13-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP115]], align 8
+// CHECK13-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 2
+// CHECK13-NEXT:    store i64 [[TMP100]], i64* [[TMP116]], align 8
+// CHECK13-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 2
+// CHECK13-NEXT:    store i8* null, i8** [[TMP117]], align 8
+// CHECK13-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP121:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    store i32 [[TMP121]], i32* [[DOTCAPTURE_EXPR_42]], align 4
+// CHECK13-NEXT:    [[TMP122:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
+// CHECK13-NEXT:    [[SUB44:%.*]] = sub nsw i32 [[TMP122]], 0
 // CHECK13-NEXT:    [[DIV45:%.*]] = sdiv i32 [[SUB44]], 1
 // CHECK13-NEXT:    [[SUB46:%.*]] = sub nsw i32 [[DIV45]], 1
 // CHECK13-NEXT:    store i32 [[SUB46]], i32* [[DOTCAPTURE_EXPR_43]], align 4
-// CHECK13-NEXT:    [[TMP128:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_43]], align 4
-// CHECK13-NEXT:    [[ADD47:%.*]] = add nsw i32 [[TMP128]], 1
-// CHECK13-NEXT:    [[TMP129:%.*]] = zext i32 [[ADD47]] to i64
-// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP129]])
-// CHECK13-NEXT:    [[TMP130:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP123]], i8** [[TMP124]], i64* [[TMP125]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP131:%.*]] = icmp ne i32 [[TMP130]], 0
-// CHECK13-NEXT:    br i1 [[TMP131]], label [[OMP_OFFLOAD_FAILED48:%.*]], label [[OMP_OFFLOAD_CONT49:%.*]]
+// CHECK13-NEXT:    [[TMP123:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_43]], align 4
+// CHECK13-NEXT:    [[ADD47:%.*]] = add nsw i32 [[TMP123]], 1
+// CHECK13-NEXT:    [[TMP124:%.*]] = zext i32 [[ADD47]] to i64
+// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP124]])
+// CHECK13-NEXT:    [[TMP125:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP118]], i8** [[TMP119]], i64* [[TMP120]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP126:%.*]] = icmp ne i32 [[TMP125]], 0
+// CHECK13-NEXT:    br i1 [[TMP126]], label [[OMP_OFFLOAD_FAILED48:%.*]], label [[OMP_OFFLOAD_CONT49:%.*]]
 // CHECK13:       omp_offload.failed48:
-// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i64 [[TMP103]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i64 [[TMP99]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT49]]
 // CHECK13:       omp_offload.cont49:
-// CHECK13-NEXT:    [[TMP132:%.*]] = load i32, i32* [[M]], align 4
-// CHECK13-NEXT:    store i32 [[TMP132]], i32* [[DOTCAPTURE_EXPR_50]], align 4
-// CHECK13-NEXT:    [[TMP133:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    [[TMP127:%.*]] = load i32, i32* [[M]], align 4
+// CHECK13-NEXT:    store i32 [[TMP127]], i32* [[DOTCAPTURE_EXPR_50]], align 4
+// CHECK13-NEXT:    [[TMP128:%.*]] = load i32, i32* [[N]], align 4
 // CHECK13-NEXT:    [[CONV52:%.*]] = bitcast i64* [[N_CASTED51]] to i32*
-// CHECK13-NEXT:    store i32 [[TMP133]], i32* [[CONV52]], align 4
-// CHECK13-NEXT:    [[TMP134:%.*]] = load i64, i64* [[N_CASTED51]], align 8
-// CHECK13-NEXT:    [[TMP135:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_50]], align 4
+// CHECK13-NEXT:    store i32 [[TMP128]], i32* [[CONV52]], align 4
+// CHECK13-NEXT:    [[TMP129:%.*]] = load i64, i64* [[N_CASTED51]], align 8
+// CHECK13-NEXT:    [[TMP130:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_50]], align 4
 // CHECK13-NEXT:    [[CONV54:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED53]] to i32*
-// CHECK13-NEXT:    store i32 [[TMP135]], i32* [[CONV54]], align 4
-// CHECK13-NEXT:    [[TMP136:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED53]], align 8
-// CHECK13-NEXT:    [[TMP137:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK13-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP134]], i64* [[TMP139]], align 8
-// CHECK13-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP134]], i64* [[TMP141]], align 8
-// CHECK13-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 0
-// CHECK13-NEXT:    store i64 4, i64* [[TMP142]], align 8
-// CHECK13-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 0
+// CHECK13-NEXT:    store i32 [[TMP130]], i32* [[CONV54]], align 4
+// CHECK13-NEXT:    [[TMP131:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED53]], align 8
+// CHECK13-NEXT:    [[TMP132:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK13-NEXT:    [[TMP133:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES58]] to i8*
+// CHECK13-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP133]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i64 32, i1 false)
+// CHECK13-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP129]], i64* [[TMP135]], align 8
+// CHECK13-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP129]], i64* [[TMP137]], align 8
+// CHECK13-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 0
+// CHECK13-NEXT:    store i8* null, i8** [[TMP138]], align 8
+// CHECK13-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP140]], align 8
+// CHECK13-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP142]], align 8
+// CHECK13-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 1
 // CHECK13-NEXT:    store i8* null, i8** [[TMP143]], align 8
-// CHECK13-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 1
-// CHECK13-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP145]], align 8
-// CHECK13-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 1
-// CHECK13-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP147]], align 8
-// CHECK13-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 1
-// CHECK13-NEXT:    store i64 8, i64* [[TMP148]], align 8
-// CHECK13-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 1
+// CHECK13-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP145]], align 8
+// CHECK13-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP147]], align 8
+// CHECK13-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 2
+// CHECK13-NEXT:    store i64 [[TMP132]], i64* [[TMP148]], align 8
+// CHECK13-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 2
 // CHECK13-NEXT:    store i8* null, i8** [[TMP149]], align 8
-// CHECK13-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP151]], align 8
-// CHECK13-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP153]], align 8
-// CHECK13-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 2
-// CHECK13-NEXT:    store i64 [[TMP137]], i64* [[TMP154]], align 8
-// CHECK13-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 2
-// CHECK13-NEXT:    store i8* null, i8** [[TMP155]], align 8
-// CHECK13-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 3
-// CHECK13-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP136]], i64* [[TMP157]], align 8
-// CHECK13-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 3
-// CHECK13-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP136]], i64* [[TMP159]], align 8
-// CHECK13-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 3
-// CHECK13-NEXT:    store i64 4, i64* [[TMP160]], align 8
-// CHECK13-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 3
-// CHECK13-NEXT:    store i8* null, i8** [[TMP161]], align 8
-// CHECK13-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP165:%.*]] = load i32, i32* [[N]], align 4
-// CHECK13-NEXT:    store i32 [[TMP165]], i32* [[DOTCAPTURE_EXPR_60]], align 4
-// CHECK13-NEXT:    [[TMP166:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_60]], align 4
-// CHECK13-NEXT:    [[SUB62:%.*]] = sub nsw i32 [[TMP166]], 0
+// CHECK13-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 3
+// CHECK13-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP131]], i64* [[TMP151]], align 8
+// CHECK13-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 3
+// CHECK13-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP131]], i64* [[TMP153]], align 8
+// CHECK13-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 3
+// CHECK13-NEXT:    store i8* null, i8** [[TMP154]], align 8
+// CHECK13-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP158:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    store i32 [[TMP158]], i32* [[DOTCAPTURE_EXPR_60]], align 4
+// CHECK13-NEXT:    [[TMP159:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_60]], align 4
+// CHECK13-NEXT:    [[SUB62:%.*]] = sub nsw i32 [[TMP159]], 0
 // CHECK13-NEXT:    [[DIV63:%.*]] = sdiv i32 [[SUB62]], 1
 // CHECK13-NEXT:    [[SUB64:%.*]] = sub nsw i32 [[DIV63]], 1
 // CHECK13-NEXT:    store i32 [[SUB64]], i32* [[DOTCAPTURE_EXPR_61]], align 4
-// CHECK13-NEXT:    [[TMP167:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_61]], align 4
-// CHECK13-NEXT:    [[ADD65:%.*]] = add nsw i32 [[TMP167]], 1
-// CHECK13-NEXT:    [[TMP168:%.*]] = zext i32 [[ADD65]] to i64
-// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP168]])
-// CHECK13-NEXT:    [[TMP169:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP162]], i8** [[TMP163]], i64* [[TMP164]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP170:%.*]] = icmp ne i32 [[TMP169]], 0
-// CHECK13-NEXT:    br i1 [[TMP170]], label [[OMP_OFFLOAD_FAILED66:%.*]], label [[OMP_OFFLOAD_CONT67:%.*]]
+// CHECK13-NEXT:    [[TMP160:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_61]], align 4
+// CHECK13-NEXT:    [[ADD65:%.*]] = add nsw i32 [[TMP160]], 1
+// CHECK13-NEXT:    [[TMP161:%.*]] = zext i32 [[ADD65]] to i64
+// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP161]])
+// CHECK13-NEXT:    [[TMP162:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP155]], i8** [[TMP156]], i64* [[TMP157]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP163:%.*]] = icmp ne i32 [[TMP162]], 0
+// CHECK13-NEXT:    br i1 [[TMP163]], label [[OMP_OFFLOAD_FAILED66:%.*]], label [[OMP_OFFLOAD_CONT67:%.*]]
 // CHECK13:       omp_offload.failed66:
-// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i64 [[TMP134]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP136]]) #[[ATTR3]]
+// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i64 [[TMP129]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP131]]) #[[ATTR3]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT67]]
 // CHECK13:       omp_offload.cont67:
-// CHECK13-NEXT:    [[TMP171:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK13-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP171]])
+// CHECK13-NEXT:    [[TMP164:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK13-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP164]])
 // CHECK13-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK13-NEXT:    [[TMP172:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK13-NEXT:    call void @llvm.stackrestore(i8* [[TMP172]])
-// CHECK13-NEXT:    [[TMP173:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK13-NEXT:    ret i32 [[TMP173]]
+// CHECK13-NEXT:    [[TMP165:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK13-NEXT:    call void @llvm.stackrestore(i8* [[TMP165]])
+// CHECK13-NEXT:    [[TMP166:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK13-NEXT:    ret i32 [[TMP166]]
 //
 //
 // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139
@@ -9631,11 +9617,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK13-NEXT:    [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9722,7 +9708,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !28
 // CHECK13-NEXT:    [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]), !llvm.access.group !28
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]), !llvm.access.group !28
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
@@ -9776,7 +9762,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9913,11 +9899,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK13-NEXT:    [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9994,7 +9980,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !34
 // CHECK13-NEXT:    [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !34
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !34
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !34
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
@@ -10025,7 +10011,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10163,11 +10149,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK13-NEXT:    [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10252,7 +10238,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP21]], i32* [[CONV9]], align 4, !llvm.access.group !40
 // CHECK13-NEXT:    [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]]), !llvm.access.group !40
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]]), !llvm.access.group !40
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40
@@ -10283,7 +10269,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10401,7 +10387,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK13-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK13-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK13-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -10443,7 +10429,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK13-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK13-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK13-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK13:       omp_offload.failed:
@@ -10461,7 +10447,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK13-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK13-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK13-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK13:       omp_offload.failed5:
@@ -10493,7 +10479,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK13-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK13-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK13-NEXT:    br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK13:       omp_offload.failed11:
@@ -10511,7 +10497,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK13-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK13-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK13-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK13:       omp_offload.failed17:
@@ -10543,7 +10529,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK13-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK13-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
 // CHECK13-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED26:%.*]], label [[OMP_OFFLOAD_CONT27:%.*]]
 // CHECK13:       omp_offload.failed26:
@@ -10559,11 +10545,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK13-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK13-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10611,7 +10597,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK13-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46
 // CHECK13-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
@@ -10633,7 +10619,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10723,11 +10709,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK13-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK13-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10775,7 +10761,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK13-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52
 // CHECK13-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52
@@ -10797,7 +10783,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10895,11 +10881,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP1]], i32* [[CONV1]], align 4
 // CHECK13-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10955,7 +10941,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58
 // CHECK13-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58
@@ -10977,7 +10963,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11092,11 +11078,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK13-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK13-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11144,7 +11130,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK13-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64
 // CHECK13-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64
@@ -11166,7 +11152,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11262,11 +11248,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP1]], i32* [[CONV1]], align 4
 // CHECK13-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11322,7 +11308,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70
 // CHECK13-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70
@@ -11344,7 +11330,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11431,7 +11417,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK13-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK13-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK13-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK13-NEXT:    ret void
@@ -11507,14 +11493,14 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK14-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK14-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK14-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK14-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK14-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK14-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK14-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK14-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK14-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK14-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK14-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK14-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK14-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -11523,304 +11509,290 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK14-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK14-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK14-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK14-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK14-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK14-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK14-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK14-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK14-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK14-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK14-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK14-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK14-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK14-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK14-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK14-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK14-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK14-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK14-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK14-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK14-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK14-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK14-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK14-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK14-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK14-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK14-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK14-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK14-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK14-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK14-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK14-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK14-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK14-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK14-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK14-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK14-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK14-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK14-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK14-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK14:       omp_offload.failed:
 // CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK14-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK14:       omp_offload.cont:
-// CHECK14-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK14-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK14-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK14-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK14-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK14-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK14-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK14-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK14-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK14-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i64 24, i1 false)
 // CHECK14-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK14-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK14-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK14-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK14-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK14-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK14-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK14-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK14-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK14-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK14-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK14-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK14-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK14-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK14-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK14-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK14-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK14-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK14-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK14-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK14-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK14-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK14-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK14-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK14-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK14-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK14-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK14-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK14-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK14-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK14-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK14-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK14-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK14-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK14-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK14-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK14-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK14-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK14-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK14-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK14-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK14-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK14-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK14-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK14-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK14-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK14-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK14-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP60]])
-// CHECK14-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK14-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK14-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK14-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK14-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK14-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP58]])
+// CHECK14-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK14-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK14:       omp_offload.failed16:
-// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK14-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK14:       omp_offload.cont17:
-// CHECK14-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M]], align 4
-// CHECK14-NEXT:    store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK14-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    [[TMP61:%.*]] = load i32, i32* [[M]], align 4
+// CHECK14-NEXT:    store i32 [[TMP61]], i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK14-NEXT:    [[TMP62:%.*]] = load i32, i32* [[N]], align 4
 // CHECK14-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK14-NEXT:    store i32 [[TMP64]], i32* [[CONV20]], align 4
-// CHECK14-NEXT:    [[TMP65:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK14-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK14-NEXT:    store i32 [[TMP62]], i32* [[CONV20]], align 4
+// CHECK14-NEXT:    [[TMP63:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK14-NEXT:    [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
 // CHECK14-NEXT:    [[CONV21:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
-// CHECK14-NEXT:    store i32 [[TMP66]], i32* [[CONV21]], align 4
-// CHECK14-NEXT:    [[TMP67:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    [[TMP68:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK14-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP65]], i64* [[TMP70]], align 8
-// CHECK14-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP65]], i64* [[TMP72]], align 8
-// CHECK14-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK14-NEXT:    store i64 4, i64* [[TMP73]], align 8
-// CHECK14-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
-// CHECK14-NEXT:    store i8* null, i8** [[TMP74]], align 8
-// CHECK14-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK14-NEXT:    store i32 [[TMP64]], i32* [[CONV21]], align 4
+// CHECK14-NEXT:    [[TMP65:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK14-NEXT:    [[TMP66:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK14-NEXT:    [[TMP67:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES25]] to i8*
+// CHECK14-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP67]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i64 32, i1 false)
+// CHECK14-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP63]], i64* [[TMP69]], align 8
+// CHECK14-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP63]], i64* [[TMP71]], align 8
+// CHECK14-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
+// CHECK14-NEXT:    store i8* null, i8** [[TMP72]], align 8
+// CHECK14-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP74]], align 8
+// CHECK14-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
 // CHECK14-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i64*
 // CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP76]], align 8
-// CHECK14-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
-// CHECK14-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
-// CHECK14-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 1
-// CHECK14-NEXT:    store i64 8, i64* [[TMP79]], align 8
-// CHECK14-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
-// CHECK14-NEXT:    store i8* null, i8** [[TMP80]], align 8
-// CHECK14-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP82]], align 8
-// CHECK14-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 8
-// CHECK14-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
-// CHECK14-NEXT:    store i64 [[TMP68]], i64* [[TMP85]], align 8
-// CHECK14-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
-// CHECK14-NEXT:    store i8* null, i8** [[TMP86]], align 8
-// CHECK14-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
-// CHECK14-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 8
-// CHECK14-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
-// CHECK14-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK14-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 3
-// CHECK14-NEXT:    store i64 4, i64* [[TMP91]], align 8
-// CHECK14-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
-// CHECK14-NEXT:    store i8* null, i8** [[TMP92]], align 8
-// CHECK14-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP96:%.*]] = load i32, i32* [[N]], align 4
-// CHECK14-NEXT:    store i32 [[TMP96]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK14-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK14-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP97]], 0
+// CHECK14-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
+// CHECK14-NEXT:    store i8* null, i8** [[TMP77]], align 8
+// CHECK14-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP79]], align 8
+// CHECK14-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 8
+// CHECK14-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
+// CHECK14-NEXT:    store i64 [[TMP66]], i64* [[TMP82]], align 8
+// CHECK14-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
+// CHECK14-NEXT:    store i8* null, i8** [[TMP83]], align 8
+// CHECK14-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
+// CHECK14-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP65]], i64* [[TMP85]], align 8
+// CHECK14-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
+// CHECK14-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP65]], i64* [[TMP87]], align 8
+// CHECK14-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
+// CHECK14-NEXT:    store i8* null, i8** [[TMP88]], align 8
+// CHECK14-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP92:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    store i32 [[TMP92]], i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK14-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK14-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP93]], 0
 // CHECK14-NEXT:    [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1
 // CHECK14-NEXT:    [[SUB31:%.*]] = sub nsw i32 [[DIV30]], 1
 // CHECK14-NEXT:    store i32 [[SUB31]], i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK14-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK14-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP98]], 1
-// CHECK14-NEXT:    [[TMP99:%.*]] = zext i32 [[ADD32]] to i64
-// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP99]])
-// CHECK14-NEXT:    [[TMP100:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP93]], i8** [[TMP94]], i64* [[TMP95]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK14-NEXT:    [[TMP101:%.*]] = icmp ne i32 [[TMP100]], 0
-// CHECK14-NEXT:    br i1 [[TMP101]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
+// CHECK14-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
+// CHECK14-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP94]], 1
+// CHECK14-NEXT:    [[TMP95:%.*]] = zext i32 [[ADD32]] to i64
+// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP95]])
+// CHECK14-NEXT:    [[TMP96:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP89]], i8** [[TMP90]], i64* [[TMP91]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP97:%.*]] = icmp ne i32 [[TMP96]], 0
+// CHECK14-NEXT:    br i1 [[TMP97]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
 // CHECK14:       omp_offload.failed33:
-// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i64 [[TMP65]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP67]]) #[[ATTR3]]
+// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i64 [[TMP63]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP65]]) #[[ATTR3]]
 // CHECK14-NEXT:    br label [[OMP_OFFLOAD_CONT34]]
 // CHECK14:       omp_offload.cont34:
-// CHECK14-NEXT:    [[TMP102:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    [[TMP98:%.*]] = load i32, i32* [[N]], align 4
 // CHECK14-NEXT:    [[CONV36:%.*]] = bitcast i64* [[N_CASTED35]] to i32*
-// CHECK14-NEXT:    store i32 [[TMP102]], i32* [[CONV36]], align 4
-// CHECK14-NEXT:    [[TMP103:%.*]] = load i64, i64* [[N_CASTED35]], align 8
-// CHECK14-NEXT:    [[TMP104:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK14-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP103]], i64* [[TMP106]], align 8
-// CHECK14-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
+// CHECK14-NEXT:    store i32 [[TMP98]], i32* [[CONV36]], align 4
+// CHECK14-NEXT:    [[TMP99:%.*]] = load i64, i64* [[N_CASTED35]], align 8
+// CHECK14-NEXT:    [[TMP100:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK14-NEXT:    [[TMP101:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES40]] to i8*
+// CHECK14-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP101]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i64 24, i1 false)
+// CHECK14-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP103:%.*]] = bitcast i8** [[TMP102]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP99]], i64* [[TMP103]], align 8
+// CHECK14-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP99]], i64* [[TMP105]], align 8
+// CHECK14-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 0
+// CHECK14-NEXT:    store i8* null, i8** [[TMP106]], align 8
+// CHECK14-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 1
 // CHECK14-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP103]], i64* [[TMP108]], align 8
-// CHECK14-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 0
-// CHECK14-NEXT:    store i64 4, i64* [[TMP109]], align 8
-// CHECK14-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 0
-// CHECK14-NEXT:    store i8* null, i8** [[TMP110]], align 8
-// CHECK14-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 1
-// CHECK14-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP112]], align 8
-// CHECK14-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 1
-// CHECK14-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP114]], align 8
-// CHECK14-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 1
-// CHECK14-NEXT:    store i64 8, i64* [[TMP115]], align 8
-// CHECK14-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 1
-// CHECK14-NEXT:    store i8* null, i8** [[TMP116]], align 8
-// CHECK14-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP118]], align 8
-// CHECK14-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP120]], align 8
-// CHECK14-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 2
-// CHECK14-NEXT:    store i64 [[TMP104]], i64* [[TMP121]], align 8
-// CHECK14-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 2
-// CHECK14-NEXT:    store i8* null, i8** [[TMP122]], align 8
-// CHECK14-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP126:%.*]] = load i32, i32* [[N]], align 4
-// CHECK14-NEXT:    store i32 [[TMP126]], i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK14-NEXT:    [[TMP127:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK14-NEXT:    [[SUB44:%.*]] = sub nsw i32 [[TMP127]], 0
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP108]], align 8
+// CHECK14-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP110]], align 8
+// CHECK14-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 1
+// CHECK14-NEXT:    store i8* null, i8** [[TMP111]], align 8
+// CHECK14-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP113]], align 8
+// CHECK14-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP115]], align 8
+// CHECK14-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 2
+// CHECK14-NEXT:    store i64 [[TMP100]], i64* [[TMP116]], align 8
+// CHECK14-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 2
+// CHECK14-NEXT:    store i8* null, i8** [[TMP117]], align 8
+// CHECK14-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP121:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    store i32 [[TMP121]], i32* [[DOTCAPTURE_EXPR_42]], align 4
+// CHECK14-NEXT:    [[TMP122:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
+// CHECK14-NEXT:    [[SUB44:%.*]] = sub nsw i32 [[TMP122]], 0
 // CHECK14-NEXT:    [[DIV45:%.*]] = sdiv i32 [[SUB44]], 1
 // CHECK14-NEXT:    [[SUB46:%.*]] = sub nsw i32 [[DIV45]], 1
 // CHECK14-NEXT:    store i32 [[SUB46]], i32* [[DOTCAPTURE_EXPR_43]], align 4
-// CHECK14-NEXT:    [[TMP128:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_43]], align 4
-// CHECK14-NEXT:    [[ADD47:%.*]] = add nsw i32 [[TMP128]], 1
-// CHECK14-NEXT:    [[TMP129:%.*]] = zext i32 [[ADD47]] to i64
-// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP129]])
-// CHECK14-NEXT:    [[TMP130:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP123]], i8** [[TMP124]], i64* [[TMP125]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK14-NEXT:    [[TMP131:%.*]] = icmp ne i32 [[TMP130]], 0
-// CHECK14-NEXT:    br i1 [[TMP131]], label [[OMP_OFFLOAD_FAILED48:%.*]], label [[OMP_OFFLOAD_CONT49:%.*]]
+// CHECK14-NEXT:    [[TMP123:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_43]], align 4
+// CHECK14-NEXT:    [[ADD47:%.*]] = add nsw i32 [[TMP123]], 1
+// CHECK14-NEXT:    [[TMP124:%.*]] = zext i32 [[ADD47]] to i64
+// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP124]])
+// CHECK14-NEXT:    [[TMP125:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP118]], i8** [[TMP119]], i64* [[TMP120]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP126:%.*]] = icmp ne i32 [[TMP125]], 0
+// CHECK14-NEXT:    br i1 [[TMP126]], label [[OMP_OFFLOAD_FAILED48:%.*]], label [[OMP_OFFLOAD_CONT49:%.*]]
 // CHECK14:       omp_offload.failed48:
-// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i64 [[TMP103]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i64 [[TMP99]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK14-NEXT:    br label [[OMP_OFFLOAD_CONT49]]
 // CHECK14:       omp_offload.cont49:
-// CHECK14-NEXT:    [[TMP132:%.*]] = load i32, i32* [[M]], align 4
-// CHECK14-NEXT:    store i32 [[TMP132]], i32* [[DOTCAPTURE_EXPR_50]], align 4
-// CHECK14-NEXT:    [[TMP133:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    [[TMP127:%.*]] = load i32, i32* [[M]], align 4
+// CHECK14-NEXT:    store i32 [[TMP127]], i32* [[DOTCAPTURE_EXPR_50]], align 4
+// CHECK14-NEXT:    [[TMP128:%.*]] = load i32, i32* [[N]], align 4
 // CHECK14-NEXT:    [[CONV52:%.*]] = bitcast i64* [[N_CASTED51]] to i32*
-// CHECK14-NEXT:    store i32 [[TMP133]], i32* [[CONV52]], align 4
-// CHECK14-NEXT:    [[TMP134:%.*]] = load i64, i64* [[N_CASTED51]], align 8
-// CHECK14-NEXT:    [[TMP135:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_50]], align 4
+// CHECK14-NEXT:    store i32 [[TMP128]], i32* [[CONV52]], align 4
+// CHECK14-NEXT:    [[TMP129:%.*]] = load i64, i64* [[N_CASTED51]], align 8
+// CHECK14-NEXT:    [[TMP130:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_50]], align 4
 // CHECK14-NEXT:    [[CONV54:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED53]] to i32*
-// CHECK14-NEXT:    store i32 [[TMP135]], i32* [[CONV54]], align 4
-// CHECK14-NEXT:    [[TMP136:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED53]], align 8
-// CHECK14-NEXT:    [[TMP137:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK14-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP134]], i64* [[TMP139]], align 8
-// CHECK14-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP134]], i64* [[TMP141]], align 8
-// CHECK14-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 0
-// CHECK14-NEXT:    store i64 4, i64* [[TMP142]], align 8
-// CHECK14-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 0
+// CHECK14-NEXT:    store i32 [[TMP130]], i32* [[CONV54]], align 4
+// CHECK14-NEXT:    [[TMP131:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED53]], align 8
+// CHECK14-NEXT:    [[TMP132:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK14-NEXT:    [[TMP133:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES58]] to i8*
+// CHECK14-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP133]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i64 32, i1 false)
+// CHECK14-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP129]], i64* [[TMP135]], align 8
+// CHECK14-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP129]], i64* [[TMP137]], align 8
+// CHECK14-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 0
+// CHECK14-NEXT:    store i8* null, i8** [[TMP138]], align 8
+// CHECK14-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP140]], align 8
+// CHECK14-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP142]], align 8
+// CHECK14-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 1
 // CHECK14-NEXT:    store i8* null, i8** [[TMP143]], align 8
-// CHECK14-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 1
-// CHECK14-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP145]], align 8
-// CHECK14-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 1
-// CHECK14-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP147]], align 8
-// CHECK14-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 1
-// CHECK14-NEXT:    store i64 8, i64* [[TMP148]], align 8
-// CHECK14-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 1
+// CHECK14-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP145]], align 8
+// CHECK14-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP147]], align 8
+// CHECK14-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 2
+// CHECK14-NEXT:    store i64 [[TMP132]], i64* [[TMP148]], align 8
+// CHECK14-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 2
 // CHECK14-NEXT:    store i8* null, i8** [[TMP149]], align 8
-// CHECK14-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP151]], align 8
-// CHECK14-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP153]], align 8
-// CHECK14-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 2
-// CHECK14-NEXT:    store i64 [[TMP137]], i64* [[TMP154]], align 8
-// CHECK14-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 2
-// CHECK14-NEXT:    store i8* null, i8** [[TMP155]], align 8
-// CHECK14-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 3
-// CHECK14-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP136]], i64* [[TMP157]], align 8
-// CHECK14-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 3
-// CHECK14-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP136]], i64* [[TMP159]], align 8
-// CHECK14-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 3
-// CHECK14-NEXT:    store i64 4, i64* [[TMP160]], align 8
-// CHECK14-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 3
-// CHECK14-NEXT:    store i8* null, i8** [[TMP161]], align 8
-// CHECK14-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP165:%.*]] = load i32, i32* [[N]], align 4
-// CHECK14-NEXT:    store i32 [[TMP165]], i32* [[DOTCAPTURE_EXPR_60]], align 4
-// CHECK14-NEXT:    [[TMP166:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_60]], align 4
-// CHECK14-NEXT:    [[SUB62:%.*]] = sub nsw i32 [[TMP166]], 0
+// CHECK14-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 3
+// CHECK14-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP131]], i64* [[TMP151]], align 8
+// CHECK14-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 3
+// CHECK14-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP131]], i64* [[TMP153]], align 8
+// CHECK14-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 3
+// CHECK14-NEXT:    store i8* null, i8** [[TMP154]], align 8
+// CHECK14-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP158:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    store i32 [[TMP158]], i32* [[DOTCAPTURE_EXPR_60]], align 4
+// CHECK14-NEXT:    [[TMP159:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_60]], align 4
+// CHECK14-NEXT:    [[SUB62:%.*]] = sub nsw i32 [[TMP159]], 0
 // CHECK14-NEXT:    [[DIV63:%.*]] = sdiv i32 [[SUB62]], 1
 // CHECK14-NEXT:    [[SUB64:%.*]] = sub nsw i32 [[DIV63]], 1
 // CHECK14-NEXT:    store i32 [[SUB64]], i32* [[DOTCAPTURE_EXPR_61]], align 4
-// CHECK14-NEXT:    [[TMP167:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_61]], align 4
-// CHECK14-NEXT:    [[ADD65:%.*]] = add nsw i32 [[TMP167]], 1
-// CHECK14-NEXT:    [[TMP168:%.*]] = zext i32 [[ADD65]] to i64
-// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP168]])
-// CHECK14-NEXT:    [[TMP169:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP162]], i8** [[TMP163]], i64* [[TMP164]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK14-NEXT:    [[TMP170:%.*]] = icmp ne i32 [[TMP169]], 0
-// CHECK14-NEXT:    br i1 [[TMP170]], label [[OMP_OFFLOAD_FAILED66:%.*]], label [[OMP_OFFLOAD_CONT67:%.*]]
+// CHECK14-NEXT:    [[TMP160:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_61]], align 4
+// CHECK14-NEXT:    [[ADD65:%.*]] = add nsw i32 [[TMP160]], 1
+// CHECK14-NEXT:    [[TMP161:%.*]] = zext i32 [[ADD65]] to i64
+// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP161]])
+// CHECK14-NEXT:    [[TMP162:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP155]], i8** [[TMP156]], i64* [[TMP157]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP163:%.*]] = icmp ne i32 [[TMP162]], 0
+// CHECK14-NEXT:    br i1 [[TMP163]], label [[OMP_OFFLOAD_FAILED66:%.*]], label [[OMP_OFFLOAD_CONT67:%.*]]
 // CHECK14:       omp_offload.failed66:
-// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i64 [[TMP134]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP136]]) #[[ATTR3]]
+// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i64 [[TMP129]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP131]]) #[[ATTR3]]
 // CHECK14-NEXT:    br label [[OMP_OFFLOAD_CONT67]]
 // CHECK14:       omp_offload.cont67:
-// CHECK14-NEXT:    [[TMP171:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK14-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP171]])
+// CHECK14-NEXT:    [[TMP164:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK14-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP164]])
 // CHECK14-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK14-NEXT:    [[TMP172:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK14-NEXT:    call void @llvm.stackrestore(i8* [[TMP172]])
-// CHECK14-NEXT:    [[TMP173:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK14-NEXT:    ret i32 [[TMP173]]
+// CHECK14-NEXT:    [[TMP165:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK14-NEXT:    call void @llvm.stackrestore(i8* [[TMP165]])
+// CHECK14-NEXT:    [[TMP166:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK14-NEXT:    ret i32 [[TMP166]]
 //
 //
 // CHECK14-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139
@@ -12340,11 +12312,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK14-NEXT:    [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -12431,7 +12403,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !28
 // CHECK14-NEXT:    [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]), !llvm.access.group !28
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]), !llvm.access.group !28
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
@@ -12485,7 +12457,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -12622,11 +12594,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK14-NEXT:    [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -12703,7 +12675,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !34
 // CHECK14-NEXT:    [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !34
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !34
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !34
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
@@ -12734,7 +12706,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -12872,11 +12844,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK14-NEXT:    [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -12961,7 +12933,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP21]], i32* [[CONV9]], align 4, !llvm.access.group !40
 // CHECK14-NEXT:    [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]]), !llvm.access.group !40
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]]), !llvm.access.group !40
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40
@@ -12992,7 +12964,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -13110,7 +13082,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK14-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK14-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK14-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK14-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -13152,7 +13124,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK14-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK14-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK14-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK14:       omp_offload.failed:
@@ -13170,7 +13142,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK14-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK14-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK14-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK14:       omp_offload.failed5:
@@ -13202,7 +13174,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK14-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK14-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK14-NEXT:    br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK14:       omp_offload.failed11:
@@ -13220,7 +13192,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK14-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK14-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK14-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK14:       omp_offload.failed17:
@@ -13252,7 +13224,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
 // CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK14-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK14-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
 // CHECK14-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED26:%.*]], label [[OMP_OFFLOAD_CONT27:%.*]]
 // CHECK14:       omp_offload.failed26:
@@ -13268,11 +13240,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK14-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK14-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -13320,7 +13292,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK14-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46
 // CHECK14-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
@@ -13342,7 +13314,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -13432,11 +13404,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK14-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK14-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -13484,7 +13456,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK14-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52
 // CHECK14-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52
@@ -13506,7 +13478,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -13604,11 +13576,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP1]], i32* [[CONV1]], align 4
 // CHECK14-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -13664,7 +13636,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58
 // CHECK14-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58
@@ -13686,7 +13658,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -13801,11 +13773,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK14-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK14-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -13853,7 +13825,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK14-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64
 // CHECK14-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64
@@ -13875,7 +13847,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -13971,11 +13943,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP1]], i32* [[CONV1]], align 4
 // CHECK14-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -14031,7 +14003,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70
 // CHECK14-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70
@@ -14053,7 +14025,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -14140,7 +14112,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK14-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK14-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK14-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK14-NEXT:    ret void
@@ -14215,14 +14187,14 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK15-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK15-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK15-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK15-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK15-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK15-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK15-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK15-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK15-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK15-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK15-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK15-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK15-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -14231,302 +14203,288 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK15-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK15-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK15-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK15-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK15-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK15-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK15-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK15-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK15-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK15-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK15-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK15-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK15-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK15-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK15-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK15-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK15-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK15-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK15-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK15-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK15-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK15-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK15-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK15-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK15-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK15-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK15-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK15-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK15-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK15-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK15-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK15-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK15-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK15-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK15-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK15-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK15-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK15-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK15-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK15:       omp_offload.failed:
 // CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK15-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK15:       omp_offload.cont:
-// CHECK15-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK15-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK15-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK15-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK15-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK15-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK15-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK15-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK15-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK15-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i32 24, i1 false)
 // CHECK15-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK15-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK15-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK15-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK15-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK15-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK15-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK15-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK15-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK15-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK15-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK15-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK15-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK15-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK15-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK15-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK15-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK15-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK15-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK15-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK15-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK15-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK15-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK15-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK15-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK15-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK15-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK15-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK15-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK15-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK15-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK15-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK15-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK15-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK15-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK15-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK15-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK15-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK15-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK15-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK15-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK15-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK15-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK15-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK15-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK15-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK15-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP61]])
-// CHECK15-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK15-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK15-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK15-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK15-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK15-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP59]])
+// CHECK15-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK15-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK15:       omp_offload.failed15:
-// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK15-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK15:       omp_offload.cont16:
-// CHECK15-NEXT:    [[TMP64:%.*]] = load i32, i32* [[M]], align 4
-// CHECK15-NEXT:    store i32 [[TMP64]], i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK15-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP65]], i32* [[N_CASTED18]], align 4
-// CHECK15-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N_CASTED18]], align 4
-// CHECK15-NEXT:    [[TMP67:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK15-NEXT:    store i32 [[TMP67]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK15-NEXT:    [[TMP68:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK15-NEXT:    [[TMP69:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK15-NEXT:    [[TMP70:%.*]] = sext i32 [[TMP69]] to i64
-// CHECK15-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP66]], i32* [[TMP72]], align 4
-// CHECK15-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP66]], i32* [[TMP74]], align 4
-// CHECK15-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK15-NEXT:    store i64 4, i64* [[TMP75]], align 4
-// CHECK15-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
-// CHECK15-NEXT:    store i8* null, i8** [[TMP76]], align 4
-// CHECK15-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP62:%.*]] = load i32, i32* [[M]], align 4
+// CHECK15-NEXT:    store i32 [[TMP62]], i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK15-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP63]], i32* [[N_CASTED18]], align 4
+// CHECK15-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N_CASTED18]], align 4
+// CHECK15-NEXT:    [[TMP65:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK15-NEXT:    store i32 [[TMP65]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK15-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK15-NEXT:    [[TMP67:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK15-NEXT:    [[TMP68:%.*]] = sext i32 [[TMP67]] to i64
+// CHECK15-NEXT:    [[TMP69:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES22]] to i8*
+// CHECK15-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP69]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i32 32, i1 false)
+// CHECK15-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP64]], i32* [[TMP71]], align 4
+// CHECK15-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP64]], i32* [[TMP73]], align 4
+// CHECK15-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
+// CHECK15-NEXT:    store i8* null, i8** [[TMP74]], align 4
+// CHECK15-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP76]], align 4
+// CHECK15-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
 // CHECK15-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
 // CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP78]], align 4
-// CHECK15-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
-// CHECK15-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
-// CHECK15-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 1
-// CHECK15-NEXT:    store i64 4, i64* [[TMP81]], align 4
-// CHECK15-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
-// CHECK15-NEXT:    store i8* null, i8** [[TMP82]], align 4
-// CHECK15-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 4
-// CHECK15-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP86]], align 4
-// CHECK15-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
-// CHECK15-NEXT:    store i64 [[TMP70]], i64* [[TMP87]], align 4
-// CHECK15-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
-// CHECK15-NEXT:    store i8* null, i8** [[TMP88]], align 4
-// CHECK15-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
-// CHECK15-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP68]], i32* [[TMP90]], align 4
-// CHECK15-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
-// CHECK15-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP68]], i32* [[TMP92]], align 4
-// CHECK15-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 3
-// CHECK15-NEXT:    store i64 4, i64* [[TMP93]], align 4
-// CHECK15-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
-// CHECK15-NEXT:    store i8* null, i8** [[TMP94]], align 4
-// CHECK15-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP98:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP98]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK15-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK15-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP99]], 0
+// CHECK15-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
+// CHECK15-NEXT:    store i8* null, i8** [[TMP79]], align 4
+// CHECK15-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 4
+// CHECK15-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 4
+// CHECK15-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
+// CHECK15-NEXT:    store i64 [[TMP68]], i64* [[TMP84]], align 4
+// CHECK15-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
+// CHECK15-NEXT:    store i8* null, i8** [[TMP85]], align 4
+// CHECK15-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
+// CHECK15-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP66]], i32* [[TMP87]], align 4
+// CHECK15-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
+// CHECK15-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP66]], i32* [[TMP89]], align 4
+// CHECK15-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
+// CHECK15-NEXT:    store i8* null, i8** [[TMP90]], align 4
+// CHECK15-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP94:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP94]], i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK15-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK15-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP95]], 0
 // CHECK15-NEXT:    [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1
 // CHECK15-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[DIV27]], 1
 // CHECK15-NEXT:    store i32 [[SUB28]], i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK15-NEXT:    [[TMP100:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK15-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP100]], 1
-// CHECK15-NEXT:    [[TMP101:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP101]])
-// CHECK15-NEXT:    [[TMP102:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP95]], i8** [[TMP96]], i64* [[TMP97]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK15-NEXT:    [[TMP103:%.*]] = icmp ne i32 [[TMP102]], 0
-// CHECK15-NEXT:    br i1 [[TMP103]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK15-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK15-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP96]], 1
+// CHECK15-NEXT:    [[TMP97:%.*]] = zext i32 [[ADD29]] to i64
+// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP97]])
+// CHECK15-NEXT:    [[TMP98:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP91]], i8** [[TMP92]], i64* [[TMP93]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP99:%.*]] = icmp ne i32 [[TMP98]], 0
+// CHECK15-NEXT:    br i1 [[TMP99]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
 // CHECK15:       omp_offload.failed30:
-// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i32 [[TMP66]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP68]]) #[[ATTR3]]
+// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i32 [[TMP64]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP66]]) #[[ATTR3]]
 // CHECK15-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
 // CHECK15:       omp_offload.cont31:
-// CHECK15-NEXT:    [[TMP104:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP104]], i32* [[N_CASTED32]], align 4
-// CHECK15-NEXT:    [[TMP105:%.*]] = load i32, i32* [[N_CASTED32]], align 4
-// CHECK15-NEXT:    [[TMP106:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK15-NEXT:    [[TMP107:%.*]] = sext i32 [[TMP106]] to i64
-// CHECK15-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP105]], i32* [[TMP109]], align 4
-// CHECK15-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP100:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP100]], i32* [[N_CASTED32]], align 4
+// CHECK15-NEXT:    [[TMP101:%.*]] = load i32, i32* [[N_CASTED32]], align 4
+// CHECK15-NEXT:    [[TMP102:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK15-NEXT:    [[TMP103:%.*]] = sext i32 [[TMP102]] to i64
+// CHECK15-NEXT:    [[TMP104:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES36]] to i8*
+// CHECK15-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP104]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i32 24, i1 false)
+// CHECK15-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP101]], i32* [[TMP106]], align 4
+// CHECK15-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP101]], i32* [[TMP108]], align 4
+// CHECK15-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 0
+// CHECK15-NEXT:    store i8* null, i8** [[TMP109]], align 4
+// CHECK15-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 1
 // CHECK15-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP105]], i32* [[TMP111]], align 4
-// CHECK15-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 0
-// CHECK15-NEXT:    store i64 4, i64* [[TMP112]], align 4
-// CHECK15-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 0
-// CHECK15-NEXT:    store i8* null, i8** [[TMP113]], align 4
-// CHECK15-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 1
-// CHECK15-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP115]], align 4
-// CHECK15-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 1
-// CHECK15-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP117]], align 4
-// CHECK15-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 1
-// CHECK15-NEXT:    store i64 4, i64* [[TMP118]], align 4
-// CHECK15-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 1
-// CHECK15-NEXT:    store i8* null, i8** [[TMP119]], align 4
-// CHECK15-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP121:%.*]] = bitcast i8** [[TMP120]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP121]], align 4
-// CHECK15-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP123]], align 4
-// CHECK15-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 2
-// CHECK15-NEXT:    store i64 [[TMP107]], i64* [[TMP124]], align 4
-// CHECK15-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 2
-// CHECK15-NEXT:    store i8* null, i8** [[TMP125]], align 4
-// CHECK15-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP129:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP129]], i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK15-NEXT:    [[TMP130:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK15-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[TMP130]], 0
+// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP111]], align 4
+// CHECK15-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP113]], align 4
+// CHECK15-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 1
+// CHECK15-NEXT:    store i8* null, i8** [[TMP114]], align 4
+// CHECK15-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP116]], align 4
+// CHECK15-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP118]], align 4
+// CHECK15-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 2
+// CHECK15-NEXT:    store i64 [[TMP103]], i64* [[TMP119]], align 4
+// CHECK15-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 2
+// CHECK15-NEXT:    store i8* null, i8** [[TMP120]], align 4
+// CHECK15-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP124:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP124]], i32* [[DOTCAPTURE_EXPR_38]], align 4
+// CHECK15-NEXT:    [[TMP125:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
+// CHECK15-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[TMP125]], 0
 // CHECK15-NEXT:    [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1
 // CHECK15-NEXT:    [[SUB42:%.*]] = sub nsw i32 [[DIV41]], 1
 // CHECK15-NEXT:    store i32 [[SUB42]], i32* [[DOTCAPTURE_EXPR_39]], align 4
-// CHECK15-NEXT:    [[TMP131:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4
-// CHECK15-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP131]], 1
-// CHECK15-NEXT:    [[TMP132:%.*]] = zext i32 [[ADD43]] to i64
-// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP132]])
-// CHECK15-NEXT:    [[TMP133:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP126]], i8** [[TMP127]], i64* [[TMP128]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK15-NEXT:    [[TMP134:%.*]] = icmp ne i32 [[TMP133]], 0
-// CHECK15-NEXT:    br i1 [[TMP134]], label [[OMP_OFFLOAD_FAILED44:%.*]], label [[OMP_OFFLOAD_CONT45:%.*]]
+// CHECK15-NEXT:    [[TMP126:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4
+// CHECK15-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP126]], 1
+// CHECK15-NEXT:    [[TMP127:%.*]] = zext i32 [[ADD43]] to i64
+// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP127]])
+// CHECK15-NEXT:    [[TMP128:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP121]], i8** [[TMP122]], i64* [[TMP123]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP129:%.*]] = icmp ne i32 [[TMP128]], 0
+// CHECK15-NEXT:    br i1 [[TMP129]], label [[OMP_OFFLOAD_FAILED44:%.*]], label [[OMP_OFFLOAD_CONT45:%.*]]
 // CHECK15:       omp_offload.failed44:
-// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i32 [[TMP105]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i32 [[TMP101]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK15-NEXT:    br label [[OMP_OFFLOAD_CONT45]]
 // CHECK15:       omp_offload.cont45:
-// CHECK15-NEXT:    [[TMP135:%.*]] = load i32, i32* [[M]], align 4
-// CHECK15-NEXT:    store i32 [[TMP135]], i32* [[DOTCAPTURE_EXPR_46]], align 4
-// CHECK15-NEXT:    [[TMP136:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP136]], i32* [[N_CASTED47]], align 4
-// CHECK15-NEXT:    [[TMP137:%.*]] = load i32, i32* [[N_CASTED47]], align 4
-// CHECK15-NEXT:    [[TMP138:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_46]], align 4
-// CHECK15-NEXT:    store i32 [[TMP138]], i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
-// CHECK15-NEXT:    [[TMP139:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
-// CHECK15-NEXT:    [[TMP140:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK15-NEXT:    [[TMP141:%.*]] = sext i32 [[TMP140]] to i64
-// CHECK15-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP137]], i32* [[TMP143]], align 4
-// CHECK15-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP137]], i32* [[TMP145]], align 4
-// CHECK15-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 0
-// CHECK15-NEXT:    store i64 4, i64* [[TMP146]], align 4
-// CHECK15-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP130:%.*]] = load i32, i32* [[M]], align 4
+// CHECK15-NEXT:    store i32 [[TMP130]], i32* [[DOTCAPTURE_EXPR_46]], align 4
+// CHECK15-NEXT:    [[TMP131:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP131]], i32* [[N_CASTED47]], align 4
+// CHECK15-NEXT:    [[TMP132:%.*]] = load i32, i32* [[N_CASTED47]], align 4
+// CHECK15-NEXT:    [[TMP133:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_46]], align 4
+// CHECK15-NEXT:    store i32 [[TMP133]], i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
+// CHECK15-NEXT:    [[TMP134:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
+// CHECK15-NEXT:    [[TMP135:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK15-NEXT:    [[TMP136:%.*]] = sext i32 [[TMP135]] to i64
+// CHECK15-NEXT:    [[TMP137:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES52]] to i8*
+// CHECK15-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP137]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i32 32, i1 false)
+// CHECK15-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP132]], i32* [[TMP139]], align 4
+// CHECK15-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP132]], i32* [[TMP141]], align 4
+// CHECK15-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 0
+// CHECK15-NEXT:    store i8* null, i8** [[TMP142]], align 4
+// CHECK15-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP144]], align 4
+// CHECK15-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP146]], align 4
+// CHECK15-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 1
 // CHECK15-NEXT:    store i8* null, i8** [[TMP147]], align 4
-// CHECK15-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 1
-// CHECK15-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP149]], align 4
-// CHECK15-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 1
-// CHECK15-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP151]], align 4
-// CHECK15-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 1
-// CHECK15-NEXT:    store i64 4, i64* [[TMP152]], align 4
-// CHECK15-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP149]], align 4
+// CHECK15-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP151]], align 4
+// CHECK15-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 2
+// CHECK15-NEXT:    store i64 [[TMP136]], i64* [[TMP152]], align 4
+// CHECK15-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 2
 // CHECK15-NEXT:    store i8* null, i8** [[TMP153]], align 4
-// CHECK15-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP155]], align 4
-// CHECK15-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP157]], align 4
-// CHECK15-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 2
-// CHECK15-NEXT:    store i64 [[TMP141]], i64* [[TMP158]], align 4
-// CHECK15-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 2
-// CHECK15-NEXT:    store i8* null, i8** [[TMP159]], align 4
-// CHECK15-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 3
-// CHECK15-NEXT:    [[TMP161:%.*]] = bitcast i8** [[TMP160]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP139]], i32* [[TMP161]], align 4
-// CHECK15-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 3
-// CHECK15-NEXT:    [[TMP163:%.*]] = bitcast i8** [[TMP162]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP139]], i32* [[TMP163]], align 4
-// CHECK15-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 3
-// CHECK15-NEXT:    store i64 4, i64* [[TMP164]], align 4
-// CHECK15-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 3
-// CHECK15-NEXT:    store i8* null, i8** [[TMP165]], align 4
-// CHECK15-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP169:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP169]], i32* [[DOTCAPTURE_EXPR_54]], align 4
-// CHECK15-NEXT:    [[TMP170:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_54]], align 4
-// CHECK15-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[TMP170]], 0
+// CHECK15-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 3
+// CHECK15-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP134]], i32* [[TMP155]], align 4
+// CHECK15-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 3
+// CHECK15-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP134]], i32* [[TMP157]], align 4
+// CHECK15-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 3
+// CHECK15-NEXT:    store i8* null, i8** [[TMP158]], align 4
+// CHECK15-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP162:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP162]], i32* [[DOTCAPTURE_EXPR_54]], align 4
+// CHECK15-NEXT:    [[TMP163:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_54]], align 4
+// CHECK15-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[TMP163]], 0
 // CHECK15-NEXT:    [[DIV57:%.*]] = sdiv i32 [[SUB56]], 1
 // CHECK15-NEXT:    [[SUB58:%.*]] = sub nsw i32 [[DIV57]], 1
 // CHECK15-NEXT:    store i32 [[SUB58]], i32* [[DOTCAPTURE_EXPR_55]], align 4
-// CHECK15-NEXT:    [[TMP171:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_55]], align 4
-// CHECK15-NEXT:    [[ADD59:%.*]] = add nsw i32 [[TMP171]], 1
-// CHECK15-NEXT:    [[TMP172:%.*]] = zext i32 [[ADD59]] to i64
-// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP172]])
-// CHECK15-NEXT:    [[TMP173:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP166]], i8** [[TMP167]], i64* [[TMP168]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK15-NEXT:    [[TMP174:%.*]] = icmp ne i32 [[TMP173]], 0
-// CHECK15-NEXT:    br i1 [[TMP174]], label [[OMP_OFFLOAD_FAILED60:%.*]], label [[OMP_OFFLOAD_CONT61:%.*]]
+// CHECK15-NEXT:    [[TMP164:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_55]], align 4
+// CHECK15-NEXT:    [[ADD59:%.*]] = add nsw i32 [[TMP164]], 1
+// CHECK15-NEXT:    [[TMP165:%.*]] = zext i32 [[ADD59]] to i64
+// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP165]])
+// CHECK15-NEXT:    [[TMP166:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP159]], i8** [[TMP160]], i64* [[TMP161]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP167:%.*]] = icmp ne i32 [[TMP166]], 0
+// CHECK15-NEXT:    br i1 [[TMP167]], label [[OMP_OFFLOAD_FAILED60:%.*]], label [[OMP_OFFLOAD_CONT61:%.*]]
 // CHECK15:       omp_offload.failed60:
-// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i32 [[TMP137]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP139]]) #[[ATTR3]]
+// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i32 [[TMP132]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP134]]) #[[ATTR3]]
 // CHECK15-NEXT:    br label [[OMP_OFFLOAD_CONT61]]
 // CHECK15:       omp_offload.cont61:
-// CHECK15-NEXT:    [[TMP175:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK15-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP175]])
+// CHECK15-NEXT:    [[TMP168:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK15-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP168]])
 // CHECK15-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK15-NEXT:    [[TMP176:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK15-NEXT:    call void @llvm.stackrestore(i8* [[TMP176]])
-// CHECK15-NEXT:    [[TMP177:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK15-NEXT:    ret i32 [[TMP177]]
+// CHECK15-NEXT:    [[TMP169:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK15-NEXT:    call void @llvm.stackrestore(i8* [[TMP169]])
+// CHECK15-NEXT:    [[TMP170:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK15-NEXT:    ret i32 [[TMP170]]
 //
 //
 // CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139
@@ -15022,11 +14980,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK15-NEXT:    store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK15-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -15107,7 +15065,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !29
 // CHECK15-NEXT:    store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29
 // CHECK15-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]), !llvm.access.group !29
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]), !llvm.access.group !29
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29
@@ -15161,7 +15119,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -15291,11 +15249,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4
 // CHECK15-NEXT:    store i32 [[TMP2]], i32* [[N_CASTED]], align 4
 // CHECK15-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -15368,7 +15326,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !35
 // CHECK15-NEXT:    store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !35
 // CHECK15-NEXT:    [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !35
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !35
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !35
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35
@@ -15399,7 +15357,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -15529,11 +15487,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK15-NEXT:    store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK15-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -15612,7 +15570,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !41
 // CHECK15-NEXT:    store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41
 // CHECK15-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]]), !llvm.access.group !41
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]]), !llvm.access.group !41
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41
@@ -15643,7 +15601,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -15756,7 +15714,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK15-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK15-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK15-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -15798,7 +15756,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK15-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK15-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK15-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK15:       omp_offload.failed:
@@ -15816,7 +15774,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK15-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK15-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK15-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK15:       omp_offload.failed5:
@@ -15847,7 +15805,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK15-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK15-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK15-NEXT:    br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK15:       omp_offload.failed11:
@@ -15865,7 +15823,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK15-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK15-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK15-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK15:       omp_offload.failed17:
@@ -15896,7 +15854,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
 // CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK15-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK15-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
 // CHECK15-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED25:%.*]], label [[OMP_OFFLOAD_CONT26:%.*]]
 // CHECK15:       omp_offload.failed25:
@@ -15912,11 +15870,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK15-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK15-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -15962,7 +15920,7 @@ int main (int argc, char **argv) {
 // CHECK15:       omp.inner.for.body:
 // CHECK15-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47
 // CHECK15-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47
@@ -15984,7 +15942,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16071,11 +16029,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK15-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK15-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16121,7 +16079,7 @@ int main (int argc, char **argv) {
 // CHECK15:       omp.inner.for.body:
 // CHECK15-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !53
 // CHECK15-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53
@@ -16143,7 +16101,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16236,11 +16194,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK15-NEXT:    store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK15-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16292,7 +16250,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !59
 // CHECK15-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59
 // CHECK15-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59
@@ -16314,7 +16272,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16423,11 +16381,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK15-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK15-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16473,7 +16431,7 @@ int main (int argc, char **argv) {
 // CHECK15:       omp.inner.for.body:
 // CHECK15-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !65
 // CHECK15-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65
@@ -16495,7 +16453,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16586,11 +16544,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK15-NEXT:    store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK15-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16642,7 +16600,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !71
 // CHECK15-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71
 // CHECK15-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71
@@ -16664,7 +16622,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16747,7 +16705,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK15-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK15-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK15-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK15-NEXT:    ret void
@@ -16822,14 +16780,14 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK16-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK16-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK16-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK16-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK16-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK16-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK16-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK16-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK16-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK16-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK16-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK16-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK16-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -16838,302 +16796,288 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK16-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK16-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK16-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK16-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK16-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK16-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK16-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK16-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK16-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK16-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK16-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK16-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK16-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK16-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK16-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK16-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK16-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK16-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK16-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK16-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK16-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK16-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK16-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK16-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK16-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK16-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK16-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK16-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK16-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK16-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK16-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK16-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK16-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK16-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK16-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK16-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK16-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK16-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK16-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK16:       omp_offload.failed:
 // CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK16-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK16:       omp_offload.cont:
-// CHECK16-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK16-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK16-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK16-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK16-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK16-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK16-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK16-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK16-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK16-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i32 24, i1 false)
 // CHECK16-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK16-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK16-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK16-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK16-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK16-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK16-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK16-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK16-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK16-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK16-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK16-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK16-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK16-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK16-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK16-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK16-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK16-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK16-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK16-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK16-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK16-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK16-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK16-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK16-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK16-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK16-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK16-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK16-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK16-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK16-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK16-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK16-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK16-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK16-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK16-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK16-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK16-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK16-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK16-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK16-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK16-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK16-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK16-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK16-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK16-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK16-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP61]])
-// CHECK16-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK16-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK16-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK16-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK16-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK16-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP59]])
+// CHECK16-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK16-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK16:       omp_offload.failed15:
-// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK16-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK16:       omp_offload.cont16:
-// CHECK16-NEXT:    [[TMP64:%.*]] = load i32, i32* [[M]], align 4
-// CHECK16-NEXT:    store i32 [[TMP64]], i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK16-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP65]], i32* [[N_CASTED18]], align 4
-// CHECK16-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N_CASTED18]], align 4
-// CHECK16-NEXT:    [[TMP67:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK16-NEXT:    store i32 [[TMP67]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK16-NEXT:    [[TMP68:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK16-NEXT:    [[TMP69:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK16-NEXT:    [[TMP70:%.*]] = sext i32 [[TMP69]] to i64
-// CHECK16-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP66]], i32* [[TMP72]], align 4
-// CHECK16-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP66]], i32* [[TMP74]], align 4
-// CHECK16-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK16-NEXT:    store i64 4, i64* [[TMP75]], align 4
-// CHECK16-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
-// CHECK16-NEXT:    store i8* null, i8** [[TMP76]], align 4
-// CHECK16-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP62:%.*]] = load i32, i32* [[M]], align 4
+// CHECK16-NEXT:    store i32 [[TMP62]], i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK16-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP63]], i32* [[N_CASTED18]], align 4
+// CHECK16-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N_CASTED18]], align 4
+// CHECK16-NEXT:    [[TMP65:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK16-NEXT:    store i32 [[TMP65]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK16-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK16-NEXT:    [[TMP67:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK16-NEXT:    [[TMP68:%.*]] = sext i32 [[TMP67]] to i64
+// CHECK16-NEXT:    [[TMP69:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES22]] to i8*
+// CHECK16-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP69]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i32 32, i1 false)
+// CHECK16-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP64]], i32* [[TMP71]], align 4
+// CHECK16-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP64]], i32* [[TMP73]], align 4
+// CHECK16-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
+// CHECK16-NEXT:    store i8* null, i8** [[TMP74]], align 4
+// CHECK16-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP76]], align 4
+// CHECK16-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
 // CHECK16-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
 // CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP78]], align 4
-// CHECK16-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
-// CHECK16-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
-// CHECK16-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 1
-// CHECK16-NEXT:    store i64 4, i64* [[TMP81]], align 4
-// CHECK16-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
-// CHECK16-NEXT:    store i8* null, i8** [[TMP82]], align 4
-// CHECK16-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 4
-// CHECK16-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP86]], align 4
-// CHECK16-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
-// CHECK16-NEXT:    store i64 [[TMP70]], i64* [[TMP87]], align 4
-// CHECK16-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
-// CHECK16-NEXT:    store i8* null, i8** [[TMP88]], align 4
-// CHECK16-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
-// CHECK16-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP68]], i32* [[TMP90]], align 4
-// CHECK16-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
-// CHECK16-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP68]], i32* [[TMP92]], align 4
-// CHECK16-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 3
-// CHECK16-NEXT:    store i64 4, i64* [[TMP93]], align 4
-// CHECK16-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
-// CHECK16-NEXT:    store i8* null, i8** [[TMP94]], align 4
-// CHECK16-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP98:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP98]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK16-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK16-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP99]], 0
+// CHECK16-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
+// CHECK16-NEXT:    store i8* null, i8** [[TMP79]], align 4
+// CHECK16-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 4
+// CHECK16-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 4
+// CHECK16-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
+// CHECK16-NEXT:    store i64 [[TMP68]], i64* [[TMP84]], align 4
+// CHECK16-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
+// CHECK16-NEXT:    store i8* null, i8** [[TMP85]], align 4
+// CHECK16-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
+// CHECK16-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP66]], i32* [[TMP87]], align 4
+// CHECK16-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
+// CHECK16-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP66]], i32* [[TMP89]], align 4
+// CHECK16-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
+// CHECK16-NEXT:    store i8* null, i8** [[TMP90]], align 4
+// CHECK16-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP94:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP94]], i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK16-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK16-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP95]], 0
 // CHECK16-NEXT:    [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1
 // CHECK16-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[DIV27]], 1
 // CHECK16-NEXT:    store i32 [[SUB28]], i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK16-NEXT:    [[TMP100:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK16-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP100]], 1
-// CHECK16-NEXT:    [[TMP101:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP101]])
-// CHECK16-NEXT:    [[TMP102:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP95]], i8** [[TMP96]], i64* [[TMP97]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK16-NEXT:    [[TMP103:%.*]] = icmp ne i32 [[TMP102]], 0
-// CHECK16-NEXT:    br i1 [[TMP103]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK16-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK16-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP96]], 1
+// CHECK16-NEXT:    [[TMP97:%.*]] = zext i32 [[ADD29]] to i64
+// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP97]])
+// CHECK16-NEXT:    [[TMP98:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP91]], i8** [[TMP92]], i64* [[TMP93]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP99:%.*]] = icmp ne i32 [[TMP98]], 0
+// CHECK16-NEXT:    br i1 [[TMP99]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
 // CHECK16:       omp_offload.failed30:
-// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i32 [[TMP66]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP68]]) #[[ATTR3]]
+// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i32 [[TMP64]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP66]]) #[[ATTR3]]
 // CHECK16-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
 // CHECK16:       omp_offload.cont31:
-// CHECK16-NEXT:    [[TMP104:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP104]], i32* [[N_CASTED32]], align 4
-// CHECK16-NEXT:    [[TMP105:%.*]] = load i32, i32* [[N_CASTED32]], align 4
-// CHECK16-NEXT:    [[TMP106:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK16-NEXT:    [[TMP107:%.*]] = sext i32 [[TMP106]] to i64
-// CHECK16-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP105]], i32* [[TMP109]], align 4
-// CHECK16-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP100:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP100]], i32* [[N_CASTED32]], align 4
+// CHECK16-NEXT:    [[TMP101:%.*]] = load i32, i32* [[N_CASTED32]], align 4
+// CHECK16-NEXT:    [[TMP102:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK16-NEXT:    [[TMP103:%.*]] = sext i32 [[TMP102]] to i64
+// CHECK16-NEXT:    [[TMP104:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES36]] to i8*
+// CHECK16-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP104]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i32 24, i1 false)
+// CHECK16-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP101]], i32* [[TMP106]], align 4
+// CHECK16-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP101]], i32* [[TMP108]], align 4
+// CHECK16-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 0
+// CHECK16-NEXT:    store i8* null, i8** [[TMP109]], align 4
+// CHECK16-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 1
 // CHECK16-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP105]], i32* [[TMP111]], align 4
-// CHECK16-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 0
-// CHECK16-NEXT:    store i64 4, i64* [[TMP112]], align 4
-// CHECK16-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 0
-// CHECK16-NEXT:    store i8* null, i8** [[TMP113]], align 4
-// CHECK16-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 1
-// CHECK16-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP115]], align 4
-// CHECK16-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 1
-// CHECK16-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP117]], align 4
-// CHECK16-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 1
-// CHECK16-NEXT:    store i64 4, i64* [[TMP118]], align 4
-// CHECK16-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 1
-// CHECK16-NEXT:    store i8* null, i8** [[TMP119]], align 4
-// CHECK16-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP121:%.*]] = bitcast i8** [[TMP120]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP121]], align 4
-// CHECK16-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP123]], align 4
-// CHECK16-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 2
-// CHECK16-NEXT:    store i64 [[TMP107]], i64* [[TMP124]], align 4
-// CHECK16-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 2
-// CHECK16-NEXT:    store i8* null, i8** [[TMP125]], align 4
-// CHECK16-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP129:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP129]], i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK16-NEXT:    [[TMP130:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK16-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[TMP130]], 0
+// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP111]], align 4
+// CHECK16-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP113]], align 4
+// CHECK16-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 1
+// CHECK16-NEXT:    store i8* null, i8** [[TMP114]], align 4
+// CHECK16-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP116]], align 4
+// CHECK16-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP118]], align 4
+// CHECK16-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 2
+// CHECK16-NEXT:    store i64 [[TMP103]], i64* [[TMP119]], align 4
+// CHECK16-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 2
+// CHECK16-NEXT:    store i8* null, i8** [[TMP120]], align 4
+// CHECK16-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP124:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP124]], i32* [[DOTCAPTURE_EXPR_38]], align 4
+// CHECK16-NEXT:    [[TMP125:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
+// CHECK16-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[TMP125]], 0
 // CHECK16-NEXT:    [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1
 // CHECK16-NEXT:    [[SUB42:%.*]] = sub nsw i32 [[DIV41]], 1
 // CHECK16-NEXT:    store i32 [[SUB42]], i32* [[DOTCAPTURE_EXPR_39]], align 4
-// CHECK16-NEXT:    [[TMP131:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4
-// CHECK16-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP131]], 1
-// CHECK16-NEXT:    [[TMP132:%.*]] = zext i32 [[ADD43]] to i64
-// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP132]])
-// CHECK16-NEXT:    [[TMP133:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP126]], i8** [[TMP127]], i64* [[TMP128]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK16-NEXT:    [[TMP134:%.*]] = icmp ne i32 [[TMP133]], 0
-// CHECK16-NEXT:    br i1 [[TMP134]], label [[OMP_OFFLOAD_FAILED44:%.*]], label [[OMP_OFFLOAD_CONT45:%.*]]
+// CHECK16-NEXT:    [[TMP126:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4
+// CHECK16-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP126]], 1
+// CHECK16-NEXT:    [[TMP127:%.*]] = zext i32 [[ADD43]] to i64
+// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP127]])
+// CHECK16-NEXT:    [[TMP128:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP121]], i8** [[TMP122]], i64* [[TMP123]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP129:%.*]] = icmp ne i32 [[TMP128]], 0
+// CHECK16-NEXT:    br i1 [[TMP129]], label [[OMP_OFFLOAD_FAILED44:%.*]], label [[OMP_OFFLOAD_CONT45:%.*]]
 // CHECK16:       omp_offload.failed44:
-// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i32 [[TMP105]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i32 [[TMP101]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK16-NEXT:    br label [[OMP_OFFLOAD_CONT45]]
 // CHECK16:       omp_offload.cont45:
-// CHECK16-NEXT:    [[TMP135:%.*]] = load i32, i32* [[M]], align 4
-// CHECK16-NEXT:    store i32 [[TMP135]], i32* [[DOTCAPTURE_EXPR_46]], align 4
-// CHECK16-NEXT:    [[TMP136:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP136]], i32* [[N_CASTED47]], align 4
-// CHECK16-NEXT:    [[TMP137:%.*]] = load i32, i32* [[N_CASTED47]], align 4
-// CHECK16-NEXT:    [[TMP138:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_46]], align 4
-// CHECK16-NEXT:    store i32 [[TMP138]], i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
-// CHECK16-NEXT:    [[TMP139:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
-// CHECK16-NEXT:    [[TMP140:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK16-NEXT:    [[TMP141:%.*]] = sext i32 [[TMP140]] to i64
-// CHECK16-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP137]], i32* [[TMP143]], align 4
-// CHECK16-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP137]], i32* [[TMP145]], align 4
-// CHECK16-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 0
-// CHECK16-NEXT:    store i64 4, i64* [[TMP146]], align 4
-// CHECK16-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP130:%.*]] = load i32, i32* [[M]], align 4
+// CHECK16-NEXT:    store i32 [[TMP130]], i32* [[DOTCAPTURE_EXPR_46]], align 4
+// CHECK16-NEXT:    [[TMP131:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP131]], i32* [[N_CASTED47]], align 4
+// CHECK16-NEXT:    [[TMP132:%.*]] = load i32, i32* [[N_CASTED47]], align 4
+// CHECK16-NEXT:    [[TMP133:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_46]], align 4
+// CHECK16-NEXT:    store i32 [[TMP133]], i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
+// CHECK16-NEXT:    [[TMP134:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
+// CHECK16-NEXT:    [[TMP135:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK16-NEXT:    [[TMP136:%.*]] = sext i32 [[TMP135]] to i64
+// CHECK16-NEXT:    [[TMP137:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES52]] to i8*
+// CHECK16-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP137]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i32 32, i1 false)
+// CHECK16-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP132]], i32* [[TMP139]], align 4
+// CHECK16-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP132]], i32* [[TMP141]], align 4
+// CHECK16-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 0
+// CHECK16-NEXT:    store i8* null, i8** [[TMP142]], align 4
+// CHECK16-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP144]], align 4
+// CHECK16-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP146]], align 4
+// CHECK16-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 1
 // CHECK16-NEXT:    store i8* null, i8** [[TMP147]], align 4
-// CHECK16-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 1
-// CHECK16-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP149]], align 4
-// CHECK16-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 1
-// CHECK16-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP151]], align 4
-// CHECK16-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 1
-// CHECK16-NEXT:    store i64 4, i64* [[TMP152]], align 4
-// CHECK16-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP149]], align 4
+// CHECK16-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP151]], align 4
+// CHECK16-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 2
+// CHECK16-NEXT:    store i64 [[TMP136]], i64* [[TMP152]], align 4
+// CHECK16-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 2
 // CHECK16-NEXT:    store i8* null, i8** [[TMP153]], align 4
-// CHECK16-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP155]], align 4
-// CHECK16-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP157]], align 4
-// CHECK16-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 2
-// CHECK16-NEXT:    store i64 [[TMP141]], i64* [[TMP158]], align 4
-// CHECK16-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 2
-// CHECK16-NEXT:    store i8* null, i8** [[TMP159]], align 4
-// CHECK16-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 3
-// CHECK16-NEXT:    [[TMP161:%.*]] = bitcast i8** [[TMP160]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP139]], i32* [[TMP161]], align 4
-// CHECK16-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 3
-// CHECK16-NEXT:    [[TMP163:%.*]] = bitcast i8** [[TMP162]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP139]], i32* [[TMP163]], align 4
-// CHECK16-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 3
-// CHECK16-NEXT:    store i64 4, i64* [[TMP164]], align 4
-// CHECK16-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 3
-// CHECK16-NEXT:    store i8* null, i8** [[TMP165]], align 4
-// CHECK16-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP169:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP169]], i32* [[DOTCAPTURE_EXPR_54]], align 4
-// CHECK16-NEXT:    [[TMP170:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_54]], align 4
-// CHECK16-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[TMP170]], 0
+// CHECK16-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 3
+// CHECK16-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP134]], i32* [[TMP155]], align 4
+// CHECK16-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 3
+// CHECK16-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP134]], i32* [[TMP157]], align 4
+// CHECK16-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 3
+// CHECK16-NEXT:    store i8* null, i8** [[TMP158]], align 4
+// CHECK16-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP162:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP162]], i32* [[DOTCAPTURE_EXPR_54]], align 4
+// CHECK16-NEXT:    [[TMP163:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_54]], align 4
+// CHECK16-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[TMP163]], 0
 // CHECK16-NEXT:    [[DIV57:%.*]] = sdiv i32 [[SUB56]], 1
 // CHECK16-NEXT:    [[SUB58:%.*]] = sub nsw i32 [[DIV57]], 1
 // CHECK16-NEXT:    store i32 [[SUB58]], i32* [[DOTCAPTURE_EXPR_55]], align 4
-// CHECK16-NEXT:    [[TMP171:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_55]], align 4
-// CHECK16-NEXT:    [[ADD59:%.*]] = add nsw i32 [[TMP171]], 1
-// CHECK16-NEXT:    [[TMP172:%.*]] = zext i32 [[ADD59]] to i64
-// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP172]])
-// CHECK16-NEXT:    [[TMP173:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP166]], i8** [[TMP167]], i64* [[TMP168]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK16-NEXT:    [[TMP174:%.*]] = icmp ne i32 [[TMP173]], 0
-// CHECK16-NEXT:    br i1 [[TMP174]], label [[OMP_OFFLOAD_FAILED60:%.*]], label [[OMP_OFFLOAD_CONT61:%.*]]
+// CHECK16-NEXT:    [[TMP164:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_55]], align 4
+// CHECK16-NEXT:    [[ADD59:%.*]] = add nsw i32 [[TMP164]], 1
+// CHECK16-NEXT:    [[TMP165:%.*]] = zext i32 [[ADD59]] to i64
+// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP165]])
+// CHECK16-NEXT:    [[TMP166:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP159]], i8** [[TMP160]], i64* [[TMP161]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP167:%.*]] = icmp ne i32 [[TMP166]], 0
+// CHECK16-NEXT:    br i1 [[TMP167]], label [[OMP_OFFLOAD_FAILED60:%.*]], label [[OMP_OFFLOAD_CONT61:%.*]]
 // CHECK16:       omp_offload.failed60:
-// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i32 [[TMP137]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP139]]) #[[ATTR3]]
+// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i32 [[TMP132]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP134]]) #[[ATTR3]]
 // CHECK16-NEXT:    br label [[OMP_OFFLOAD_CONT61]]
 // CHECK16:       omp_offload.cont61:
-// CHECK16-NEXT:    [[TMP175:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK16-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP175]])
+// CHECK16-NEXT:    [[TMP168:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK16-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP168]])
 // CHECK16-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK16-NEXT:    [[TMP176:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK16-NEXT:    call void @llvm.stackrestore(i8* [[TMP176]])
-// CHECK16-NEXT:    [[TMP177:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK16-NEXT:    ret i32 [[TMP177]]
+// CHECK16-NEXT:    [[TMP169:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK16-NEXT:    call void @llvm.stackrestore(i8* [[TMP169]])
+// CHECK16-NEXT:    [[TMP170:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK16-NEXT:    ret i32 [[TMP170]]
 //
 //
 // CHECK16-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139
@@ -17629,11 +17573,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK16-NEXT:    store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK16-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -17714,7 +17658,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !29
 // CHECK16-NEXT:    store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29
 // CHECK16-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]), !llvm.access.group !29
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]), !llvm.access.group !29
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29
@@ -17768,7 +17712,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -17898,11 +17842,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4
 // CHECK16-NEXT:    store i32 [[TMP2]], i32* [[N_CASTED]], align 4
 // CHECK16-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -17975,7 +17919,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !35
 // CHECK16-NEXT:    store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !35
 // CHECK16-NEXT:    [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !35
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !35
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !35
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35
@@ -18006,7 +17950,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -18136,11 +18080,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK16-NEXT:    store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK16-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -18219,7 +18163,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !41
 // CHECK16-NEXT:    store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41
 // CHECK16-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]]), !llvm.access.group !41
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]]), !llvm.access.group !41
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41
@@ -18250,7 +18194,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -18363,7 +18307,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK16-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK16-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK16-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK16-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -18405,7 +18349,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK16-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK16-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK16-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK16:       omp_offload.failed:
@@ -18423,7 +18367,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK16-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK16-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK16-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK16:       omp_offload.failed5:
@@ -18454,7 +18398,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK16-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK16-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK16-NEXT:    br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK16:       omp_offload.failed11:
@@ -18472,7 +18416,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK16-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK16-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK16-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK16:       omp_offload.failed17:
@@ -18503,7 +18447,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
 // CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK16-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK16-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
 // CHECK16-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED25:%.*]], label [[OMP_OFFLOAD_CONT26:%.*]]
 // CHECK16:       omp_offload.failed25:
@@ -18519,11 +18463,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK16-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK16-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -18569,7 +18513,7 @@ int main (int argc, char **argv) {
 // CHECK16:       omp.inner.for.body:
 // CHECK16-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47
 // CHECK16-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47
@@ -18591,7 +18535,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -18678,11 +18622,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK16-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK16-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -18728,7 +18672,7 @@ int main (int argc, char **argv) {
 // CHECK16:       omp.inner.for.body:
 // CHECK16-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !53
 // CHECK16-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53
@@ -18750,7 +18694,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -18843,11 +18787,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK16-NEXT:    store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK16-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -18899,7 +18843,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !59
 // CHECK16-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59
 // CHECK16-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59
@@ -18921,7 +18865,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -19030,11 +18974,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK16-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK16-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -19080,7 +19024,7 @@ int main (int argc, char **argv) {
 // CHECK16:       omp.inner.for.body:
 // CHECK16-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !65
 // CHECK16-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65
@@ -19102,7 +19046,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -19193,11 +19137,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK16-NEXT:    store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK16-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -19249,7 +19193,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !71
 // CHECK16-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71
 // CHECK16-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71
@@ -19271,7 +19215,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -19354,7 +19298,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK16-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK16-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK16-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK16-NEXT:    ret void
@@ -19430,14 +19374,14 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK17-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK17-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK17-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK17-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK17-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK17-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK17-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK17-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK17-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK17-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK17-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -19446,304 +19390,290 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK17-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK17-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK17-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK17-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK17-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK17-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK17-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK17-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK17-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK17-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK17-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK17-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK17-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK17-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK17-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK17-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK17-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK17-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK17-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK17-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK17-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK17-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK17-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK17-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK17-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK17-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK17-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK17-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK17-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK17-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK17-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK17-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK17:       omp_offload.failed:
 // CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK17:       omp_offload.cont:
-// CHECK17-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK17-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK17-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK17-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK17-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK17-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK17-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK17-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i64 24, i1 false)
 // CHECK17-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK17-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK17-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK17-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK17-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK17-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK17-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK17-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK17-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK17-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK17-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK17-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK17-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK17-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK17-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK17-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK17-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK17-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK17-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK17-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK17-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK17-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK17-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK17-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK17-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK17-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK17-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK17-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK17-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK17-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK17-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK17-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK17-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK17-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK17-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK17-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK17-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK17-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK17-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP60]])
-// CHECK17-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK17-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK17-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK17-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK17-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP58]])
+// CHECK17-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK17-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK17:       omp_offload.failed16:
-// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK17:       omp_offload.cont17:
-// CHECK17-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M]], align 4
-// CHECK17-NEXT:    store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK17-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    [[TMP61:%.*]] = load i32, i32* [[M]], align 4
+// CHECK17-NEXT:    store i32 [[TMP61]], i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK17-NEXT:    [[TMP62:%.*]] = load i32, i32* [[N]], align 4
 // CHECK17-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP64]], i32* [[CONV20]], align 4
-// CHECK17-NEXT:    [[TMP65:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK17-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK17-NEXT:    store i32 [[TMP62]], i32* [[CONV20]], align 4
+// CHECK17-NEXT:    [[TMP63:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK17-NEXT:    [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
 // CHECK17-NEXT:    [[CONV21:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP66]], i32* [[CONV21]], align 4
-// CHECK17-NEXT:    [[TMP67:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK17-NEXT:    [[TMP68:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK17-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP65]], i64* [[TMP70]], align 8
-// CHECK17-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP65]], i64* [[TMP72]], align 8
-// CHECK17-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 4, i64* [[TMP73]], align 8
-// CHECK17-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
-// CHECK17-NEXT:    store i8* null, i8** [[TMP74]], align 8
-// CHECK17-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK17-NEXT:    store i32 [[TMP64]], i32* [[CONV21]], align 4
+// CHECK17-NEXT:    [[TMP65:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK17-NEXT:    [[TMP66:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK17-NEXT:    [[TMP67:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES25]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP67]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i64 32, i1 false)
+// CHECK17-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP63]], i64* [[TMP69]], align 8
+// CHECK17-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP63]], i64* [[TMP71]], align 8
+// CHECK17-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
+// CHECK17-NEXT:    store i8* null, i8** [[TMP72]], align 8
+// CHECK17-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP74]], align 8
+// CHECK17-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
 // CHECK17-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i64*
 // CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP76]], align 8
-// CHECK17-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
-// CHECK17-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 8, i64* [[TMP79]], align 8
-// CHECK17-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP80]], align 8
-// CHECK17-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP82]], align 8
-// CHECK17-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 8
-// CHECK17-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 [[TMP68]], i64* [[TMP85]], align 8
-// CHECK17-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP86]], align 8
-// CHECK17-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 8
-// CHECK17-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK17-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 3
-// CHECK17-NEXT:    store i64 4, i64* [[TMP91]], align 8
-// CHECK17-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
-// CHECK17-NEXT:    store i8* null, i8** [[TMP92]], align 8
-// CHECK17-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP96:%.*]] = load i32, i32* [[N]], align 4
-// CHECK17-NEXT:    store i32 [[TMP96]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK17-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK17-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP97]], 0
+// CHECK17-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP77]], align 8
+// CHECK17-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP79]], align 8
+// CHECK17-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 8
+// CHECK17-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
+// CHECK17-NEXT:    store i64 [[TMP66]], i64* [[TMP82]], align 8
+// CHECK17-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP83]], align 8
+// CHECK17-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP65]], i64* [[TMP85]], align 8
+// CHECK17-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP65]], i64* [[TMP87]], align 8
+// CHECK17-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
+// CHECK17-NEXT:    store i8* null, i8** [[TMP88]], align 8
+// CHECK17-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP92:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP92]], i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK17-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK17-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP93]], 0
 // CHECK17-NEXT:    [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1
 // CHECK17-NEXT:    [[SUB31:%.*]] = sub nsw i32 [[DIV30]], 1
 // CHECK17-NEXT:    store i32 [[SUB31]], i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK17-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK17-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP98]], 1
-// CHECK17-NEXT:    [[TMP99:%.*]] = zext i32 [[ADD32]] to i64
-// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP99]])
-// CHECK17-NEXT:    [[TMP100:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP93]], i8** [[TMP94]], i64* [[TMP95]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP101:%.*]] = icmp ne i32 [[TMP100]], 0
-// CHECK17-NEXT:    br i1 [[TMP101]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
+// CHECK17-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
+// CHECK17-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP94]], 1
+// CHECK17-NEXT:    [[TMP95:%.*]] = zext i32 [[ADD32]] to i64
+// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP95]])
+// CHECK17-NEXT:    [[TMP96:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP89]], i8** [[TMP90]], i64* [[TMP91]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP97:%.*]] = icmp ne i32 [[TMP96]], 0
+// CHECK17-NEXT:    br i1 [[TMP97]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
 // CHECK17:       omp_offload.failed33:
-// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i64 [[TMP65]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP67]]) #[[ATTR3]]
+// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i64 [[TMP63]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP65]]) #[[ATTR3]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT34]]
 // CHECK17:       omp_offload.cont34:
-// CHECK17-NEXT:    [[TMP102:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    [[TMP98:%.*]] = load i32, i32* [[N]], align 4
 // CHECK17-NEXT:    [[CONV36:%.*]] = bitcast i64* [[N_CASTED35]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP102]], i32* [[CONV36]], align 4
-// CHECK17-NEXT:    [[TMP103:%.*]] = load i64, i64* [[N_CASTED35]], align 8
-// CHECK17-NEXT:    [[TMP104:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK17-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP103]], i64* [[TMP106]], align 8
-// CHECK17-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
+// CHECK17-NEXT:    store i32 [[TMP98]], i32* [[CONV36]], align 4
+// CHECK17-NEXT:    [[TMP99:%.*]] = load i64, i64* [[N_CASTED35]], align 8
+// CHECK17-NEXT:    [[TMP100:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK17-NEXT:    [[TMP101:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES40]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP101]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i64 24, i1 false)
+// CHECK17-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP103:%.*]] = bitcast i8** [[TMP102]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP99]], i64* [[TMP103]], align 8
+// CHECK17-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP99]], i64* [[TMP105]], align 8
+// CHECK17-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 0
+// CHECK17-NEXT:    store i8* null, i8** [[TMP106]], align 8
+// CHECK17-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 1
 // CHECK17-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP103]], i64* [[TMP108]], align 8
-// CHECK17-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 4, i64* [[TMP109]], align 8
-// CHECK17-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 0
-// CHECK17-NEXT:    store i8* null, i8** [[TMP110]], align 8
-// CHECK17-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP112]], align 8
-// CHECK17-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP114]], align 8
-// CHECK17-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 8, i64* [[TMP115]], align 8
-// CHECK17-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP116]], align 8
-// CHECK17-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP118]], align 8
-// CHECK17-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP120]], align 8
-// CHECK17-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 [[TMP104]], i64* [[TMP121]], align 8
-// CHECK17-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP122]], align 8
-// CHECK17-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP126:%.*]] = load i32, i32* [[N]], align 4
-// CHECK17-NEXT:    store i32 [[TMP126]], i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK17-NEXT:    [[TMP127:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK17-NEXT:    [[SUB44:%.*]] = sub nsw i32 [[TMP127]], 0
+// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP108]], align 8
+// CHECK17-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP110]], align 8
+// CHECK17-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP111]], align 8
+// CHECK17-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP113]], align 8
+// CHECK17-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP115]], align 8
+// CHECK17-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 2
+// CHECK17-NEXT:    store i64 [[TMP100]], i64* [[TMP116]], align 8
+// CHECK17-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP117]], align 8
+// CHECK17-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP121:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP121]], i32* [[DOTCAPTURE_EXPR_42]], align 4
+// CHECK17-NEXT:    [[TMP122:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
+// CHECK17-NEXT:    [[SUB44:%.*]] = sub nsw i32 [[TMP122]], 0
 // CHECK17-NEXT:    [[DIV45:%.*]] = sdiv i32 [[SUB44]], 1
 // CHECK17-NEXT:    [[SUB46:%.*]] = sub nsw i32 [[DIV45]], 1
 // CHECK17-NEXT:    store i32 [[SUB46]], i32* [[DOTCAPTURE_EXPR_43]], align 4
-// CHECK17-NEXT:    [[TMP128:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_43]], align 4
-// CHECK17-NEXT:    [[ADD47:%.*]] = add nsw i32 [[TMP128]], 1
-// CHECK17-NEXT:    [[TMP129:%.*]] = zext i32 [[ADD47]] to i64
-// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP129]])
-// CHECK17-NEXT:    [[TMP130:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP123]], i8** [[TMP124]], i64* [[TMP125]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP131:%.*]] = icmp ne i32 [[TMP130]], 0
-// CHECK17-NEXT:    br i1 [[TMP131]], label [[OMP_OFFLOAD_FAILED48:%.*]], label [[OMP_OFFLOAD_CONT49:%.*]]
+// CHECK17-NEXT:    [[TMP123:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_43]], align 4
+// CHECK17-NEXT:    [[ADD47:%.*]] = add nsw i32 [[TMP123]], 1
+// CHECK17-NEXT:    [[TMP124:%.*]] = zext i32 [[ADD47]] to i64
+// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP124]])
+// CHECK17-NEXT:    [[TMP125:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP118]], i8** [[TMP119]], i64* [[TMP120]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP126:%.*]] = icmp ne i32 [[TMP125]], 0
+// CHECK17-NEXT:    br i1 [[TMP126]], label [[OMP_OFFLOAD_FAILED48:%.*]], label [[OMP_OFFLOAD_CONT49:%.*]]
 // CHECK17:       omp_offload.failed48:
-// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i64 [[TMP103]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i64 [[TMP99]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT49]]
 // CHECK17:       omp_offload.cont49:
-// CHECK17-NEXT:    [[TMP132:%.*]] = load i32, i32* [[M]], align 4
-// CHECK17-NEXT:    store i32 [[TMP132]], i32* [[DOTCAPTURE_EXPR_50]], align 4
-// CHECK17-NEXT:    [[TMP133:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    [[TMP127:%.*]] = load i32, i32* [[M]], align 4
+// CHECK17-NEXT:    store i32 [[TMP127]], i32* [[DOTCAPTURE_EXPR_50]], align 4
+// CHECK17-NEXT:    [[TMP128:%.*]] = load i32, i32* [[N]], align 4
 // CHECK17-NEXT:    [[CONV52:%.*]] = bitcast i64* [[N_CASTED51]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP133]], i32* [[CONV52]], align 4
-// CHECK17-NEXT:    [[TMP134:%.*]] = load i64, i64* [[N_CASTED51]], align 8
-// CHECK17-NEXT:    [[TMP135:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_50]], align 4
+// CHECK17-NEXT:    store i32 [[TMP128]], i32* [[CONV52]], align 4
+// CHECK17-NEXT:    [[TMP129:%.*]] = load i64, i64* [[N_CASTED51]], align 8
+// CHECK17-NEXT:    [[TMP130:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_50]], align 4
 // CHECK17-NEXT:    [[CONV54:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED53]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP135]], i32* [[CONV54]], align 4
-// CHECK17-NEXT:    [[TMP136:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED53]], align 8
-// CHECK17-NEXT:    [[TMP137:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK17-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP134]], i64* [[TMP139]], align 8
-// CHECK17-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP134]], i64* [[TMP141]], align 8
-// CHECK17-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 4, i64* [[TMP142]], align 8
-// CHECK17-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 0
+// CHECK17-NEXT:    store i32 [[TMP130]], i32* [[CONV54]], align 4
+// CHECK17-NEXT:    [[TMP131:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED53]], align 8
+// CHECK17-NEXT:    [[TMP132:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK17-NEXT:    [[TMP133:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES58]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP133]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i64 32, i1 false)
+// CHECK17-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP129]], i64* [[TMP135]], align 8
+// CHECK17-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP129]], i64* [[TMP137]], align 8
+// CHECK17-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 0
+// CHECK17-NEXT:    store i8* null, i8** [[TMP138]], align 8
+// CHECK17-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP140]], align 8
+// CHECK17-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP142]], align 8
+// CHECK17-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 1
 // CHECK17-NEXT:    store i8* null, i8** [[TMP143]], align 8
-// CHECK17-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP145]], align 8
-// CHECK17-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP147]], align 8
-// CHECK17-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 8, i64* [[TMP148]], align 8
-// CHECK17-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 1
+// CHECK17-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP145]], align 8
+// CHECK17-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP147]], align 8
+// CHECK17-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 2
+// CHECK17-NEXT:    store i64 [[TMP132]], i64* [[TMP148]], align 8
+// CHECK17-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 2
 // CHECK17-NEXT:    store i8* null, i8** [[TMP149]], align 8
-// CHECK17-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP151]], align 8
-// CHECK17-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP153]], align 8
-// CHECK17-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 [[TMP137]], i64* [[TMP154]], align 8
-// CHECK17-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP155]], align 8
-// CHECK17-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP136]], i64* [[TMP157]], align 8
-// CHECK17-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP136]], i64* [[TMP159]], align 8
-// CHECK17-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 3
-// CHECK17-NEXT:    store i64 4, i64* [[TMP160]], align 8
-// CHECK17-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 3
-// CHECK17-NEXT:    store i8* null, i8** [[TMP161]], align 8
-// CHECK17-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP165:%.*]] = load i32, i32* [[N]], align 4
-// CHECK17-NEXT:    store i32 [[TMP165]], i32* [[DOTCAPTURE_EXPR_60]], align 4
-// CHECK17-NEXT:    [[TMP166:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_60]], align 4
-// CHECK17-NEXT:    [[SUB62:%.*]] = sub nsw i32 [[TMP166]], 0
+// CHECK17-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP131]], i64* [[TMP151]], align 8
+// CHECK17-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP131]], i64* [[TMP153]], align 8
+// CHECK17-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 3
+// CHECK17-NEXT:    store i8* null, i8** [[TMP154]], align 8
+// CHECK17-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP158:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP158]], i32* [[DOTCAPTURE_EXPR_60]], align 4
+// CHECK17-NEXT:    [[TMP159:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_60]], align 4
+// CHECK17-NEXT:    [[SUB62:%.*]] = sub nsw i32 [[TMP159]], 0
 // CHECK17-NEXT:    [[DIV63:%.*]] = sdiv i32 [[SUB62]], 1
 // CHECK17-NEXT:    [[SUB64:%.*]] = sub nsw i32 [[DIV63]], 1
 // CHECK17-NEXT:    store i32 [[SUB64]], i32* [[DOTCAPTURE_EXPR_61]], align 4
-// CHECK17-NEXT:    [[TMP167:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_61]], align 4
-// CHECK17-NEXT:    [[ADD65:%.*]] = add nsw i32 [[TMP167]], 1
-// CHECK17-NEXT:    [[TMP168:%.*]] = zext i32 [[ADD65]] to i64
-// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP168]])
-// CHECK17-NEXT:    [[TMP169:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP162]], i8** [[TMP163]], i64* [[TMP164]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP170:%.*]] = icmp ne i32 [[TMP169]], 0
-// CHECK17-NEXT:    br i1 [[TMP170]], label [[OMP_OFFLOAD_FAILED66:%.*]], label [[OMP_OFFLOAD_CONT67:%.*]]
+// CHECK17-NEXT:    [[TMP160:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_61]], align 4
+// CHECK17-NEXT:    [[ADD65:%.*]] = add nsw i32 [[TMP160]], 1
+// CHECK17-NEXT:    [[TMP161:%.*]] = zext i32 [[ADD65]] to i64
+// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP161]])
+// CHECK17-NEXT:    [[TMP162:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP155]], i8** [[TMP156]], i64* [[TMP157]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP163:%.*]] = icmp ne i32 [[TMP162]], 0
+// CHECK17-NEXT:    br i1 [[TMP163]], label [[OMP_OFFLOAD_FAILED66:%.*]], label [[OMP_OFFLOAD_CONT67:%.*]]
 // CHECK17:       omp_offload.failed66:
-// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i64 [[TMP134]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP136]]) #[[ATTR3]]
+// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i64 [[TMP129]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP131]]) #[[ATTR3]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT67]]
 // CHECK17:       omp_offload.cont67:
-// CHECK17-NEXT:    [[TMP171:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK17-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP171]])
+// CHECK17-NEXT:    [[TMP164:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK17-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP164]])
 // CHECK17-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK17-NEXT:    [[TMP172:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP172]])
-// CHECK17-NEXT:    [[TMP173:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK17-NEXT:    ret i32 [[TMP173]]
+// CHECK17-NEXT:    [[TMP165:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP165]])
+// CHECK17-NEXT:    [[TMP166:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK17-NEXT:    ret i32 [[TMP166]]
 //
 //
 // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139
@@ -20263,11 +20193,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK17-NEXT:    [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -20354,7 +20284,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !28
 // CHECK17-NEXT:    [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]), !llvm.access.group !28
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]), !llvm.access.group !28
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
@@ -20408,7 +20338,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -20545,11 +20475,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK17-NEXT:    [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -20626,7 +20556,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !34
 // CHECK17-NEXT:    [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !34
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !34
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !34
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
@@ -20657,7 +20587,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -20795,11 +20725,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK17-NEXT:    [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -20884,7 +20814,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP21]], i32* [[CONV9]], align 4, !llvm.access.group !40
 // CHECK17-NEXT:    [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]]), !llvm.access.group !40
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]]), !llvm.access.group !40
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40
@@ -20915,7 +20845,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21033,7 +20963,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK17-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK17-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK17-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -21075,7 +21005,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK17-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK17-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK17-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK17:       omp_offload.failed:
@@ -21093,7 +21023,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK17-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK17-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK17-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK17:       omp_offload.failed5:
@@ -21125,7 +21055,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK17-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK17-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK17-NEXT:    br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK17:       omp_offload.failed11:
@@ -21143,7 +21073,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK17-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK17-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK17-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK17:       omp_offload.failed17:
@@ -21175,7 +21105,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
 // CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK17-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK17-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
 // CHECK17-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED26:%.*]], label [[OMP_OFFLOAD_CONT27:%.*]]
 // CHECK17:       omp_offload.failed26:
@@ -21191,11 +21121,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK17-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK17-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21243,7 +21173,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK17-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46
 // CHECK17-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
@@ -21265,7 +21195,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21355,11 +21285,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK17-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK17-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21407,7 +21337,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK17-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52
 // CHECK17-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52
@@ -21429,7 +21359,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21527,11 +21457,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP1]], i32* [[CONV1]], align 4
 // CHECK17-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21587,7 +21517,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58
 // CHECK17-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58
@@ -21609,7 +21539,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21724,11 +21654,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK17-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK17-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21776,7 +21706,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK17-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64
 // CHECK17-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64
@@ -21798,7 +21728,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21894,11 +21824,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP1]], i32* [[CONV1]], align 4
 // CHECK17-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21954,7 +21884,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70
 // CHECK17-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70
@@ -21976,7 +21906,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -22063,7 +21993,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK17-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK17-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK17-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK17-NEXT:    ret void
@@ -22139,14 +22069,14 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK18-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK18-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK18-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK18-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK18-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK18-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK18-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK18-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK18-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK18-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK18-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -22155,304 +22085,290 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK18-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK18-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK18-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK18-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK18-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK18-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK18-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK18-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK18-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK18-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK18-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK18-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK18-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK18-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK18-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK18-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK18-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK18-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK18-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK18-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK18-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK18-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK18-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK18-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK18-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK18-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK18-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK18-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK18-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK18-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK18-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK18-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK18:       omp_offload.failed:
 // CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK18:       omp_offload.cont:
-// CHECK18-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK18-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK18-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK18-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK18-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK18-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK18-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK18-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i64 24, i1 false)
 // CHECK18-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK18-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK18-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK18-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK18-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK18-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK18-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK18-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK18-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK18-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK18-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK18-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK18-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK18-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK18-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK18-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK18-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK18-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK18-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK18-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK18-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK18-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK18-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK18-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK18-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK18-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK18-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK18-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK18-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK18-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK18-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK18-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK18-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK18-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK18-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK18-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK18-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK18-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK18-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP60]])
-// CHECK18-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK18-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK18-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK18-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK18-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP58]])
+// CHECK18-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK18-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK18:       omp_offload.failed16:
-// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK18:       omp_offload.cont17:
-// CHECK18-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M]], align 4
-// CHECK18-NEXT:    store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK18-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    [[TMP61:%.*]] = load i32, i32* [[M]], align 4
+// CHECK18-NEXT:    store i32 [[TMP61]], i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK18-NEXT:    [[TMP62:%.*]] = load i32, i32* [[N]], align 4
 // CHECK18-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP64]], i32* [[CONV20]], align 4
-// CHECK18-NEXT:    [[TMP65:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK18-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK18-NEXT:    store i32 [[TMP62]], i32* [[CONV20]], align 4
+// CHECK18-NEXT:    [[TMP63:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK18-NEXT:    [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
 // CHECK18-NEXT:    [[CONV21:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP66]], i32* [[CONV21]], align 4
-// CHECK18-NEXT:    [[TMP67:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK18-NEXT:    [[TMP68:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK18-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP65]], i64* [[TMP70]], align 8
-// CHECK18-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP65]], i64* [[TMP72]], align 8
-// CHECK18-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 4, i64* [[TMP73]], align 8
-// CHECK18-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
-// CHECK18-NEXT:    store i8* null, i8** [[TMP74]], align 8
-// CHECK18-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK18-NEXT:    store i32 [[TMP64]], i32* [[CONV21]], align 4
+// CHECK18-NEXT:    [[TMP65:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK18-NEXT:    [[TMP66:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK18-NEXT:    [[TMP67:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES25]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP67]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i64 32, i1 false)
+// CHECK18-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP63]], i64* [[TMP69]], align 8
+// CHECK18-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP63]], i64* [[TMP71]], align 8
+// CHECK18-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
+// CHECK18-NEXT:    store i8* null, i8** [[TMP72]], align 8
+// CHECK18-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP74]], align 8
+// CHECK18-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
 // CHECK18-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i64*
 // CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP76]], align 8
-// CHECK18-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
-// CHECK18-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 8, i64* [[TMP79]], align 8
-// CHECK18-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP80]], align 8
-// CHECK18-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP82]], align 8
-// CHECK18-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 8
-// CHECK18-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 [[TMP68]], i64* [[TMP85]], align 8
-// CHECK18-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP86]], align 8
-// CHECK18-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 8
-// CHECK18-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK18-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 3
-// CHECK18-NEXT:    store i64 4, i64* [[TMP91]], align 8
-// CHECK18-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
-// CHECK18-NEXT:    store i8* null, i8** [[TMP92]], align 8
-// CHECK18-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP96:%.*]] = load i32, i32* [[N]], align 4
-// CHECK18-NEXT:    store i32 [[TMP96]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK18-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK18-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP97]], 0
+// CHECK18-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP77]], align 8
+// CHECK18-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP79]], align 8
+// CHECK18-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 8
+// CHECK18-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
+// CHECK18-NEXT:    store i64 [[TMP66]], i64* [[TMP82]], align 8
+// CHECK18-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP83]], align 8
+// CHECK18-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP65]], i64* [[TMP85]], align 8
+// CHECK18-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP65]], i64* [[TMP87]], align 8
+// CHECK18-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
+// CHECK18-NEXT:    store i8* null, i8** [[TMP88]], align 8
+// CHECK18-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP92:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP92]], i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK18-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK18-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP93]], 0
 // CHECK18-NEXT:    [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1
 // CHECK18-NEXT:    [[SUB31:%.*]] = sub nsw i32 [[DIV30]], 1
 // CHECK18-NEXT:    store i32 [[SUB31]], i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK18-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK18-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP98]], 1
-// CHECK18-NEXT:    [[TMP99:%.*]] = zext i32 [[ADD32]] to i64
-// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP99]])
-// CHECK18-NEXT:    [[TMP100:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP93]], i8** [[TMP94]], i64* [[TMP95]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP101:%.*]] = icmp ne i32 [[TMP100]], 0
-// CHECK18-NEXT:    br i1 [[TMP101]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
+// CHECK18-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
+// CHECK18-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP94]], 1
+// CHECK18-NEXT:    [[TMP95:%.*]] = zext i32 [[ADD32]] to i64
+// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP95]])
+// CHECK18-NEXT:    [[TMP96:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP89]], i8** [[TMP90]], i64* [[TMP91]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP97:%.*]] = icmp ne i32 [[TMP96]], 0
+// CHECK18-NEXT:    br i1 [[TMP97]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
 // CHECK18:       omp_offload.failed33:
-// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i64 [[TMP65]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP67]]) #[[ATTR3]]
+// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i64 [[TMP63]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP65]]) #[[ATTR3]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT34]]
 // CHECK18:       omp_offload.cont34:
-// CHECK18-NEXT:    [[TMP102:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    [[TMP98:%.*]] = load i32, i32* [[N]], align 4
 // CHECK18-NEXT:    [[CONV36:%.*]] = bitcast i64* [[N_CASTED35]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP102]], i32* [[CONV36]], align 4
-// CHECK18-NEXT:    [[TMP103:%.*]] = load i64, i64* [[N_CASTED35]], align 8
-// CHECK18-NEXT:    [[TMP104:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK18-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP103]], i64* [[TMP106]], align 8
-// CHECK18-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
+// CHECK18-NEXT:    store i32 [[TMP98]], i32* [[CONV36]], align 4
+// CHECK18-NEXT:    [[TMP99:%.*]] = load i64, i64* [[N_CASTED35]], align 8
+// CHECK18-NEXT:    [[TMP100:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK18-NEXT:    [[TMP101:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES40]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP101]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i64 24, i1 false)
+// CHECK18-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP103:%.*]] = bitcast i8** [[TMP102]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP99]], i64* [[TMP103]], align 8
+// CHECK18-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP99]], i64* [[TMP105]], align 8
+// CHECK18-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 0
+// CHECK18-NEXT:    store i8* null, i8** [[TMP106]], align 8
+// CHECK18-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 1
 // CHECK18-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP103]], i64* [[TMP108]], align 8
-// CHECK18-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 4, i64* [[TMP109]], align 8
-// CHECK18-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 0
-// CHECK18-NEXT:    store i8* null, i8** [[TMP110]], align 8
-// CHECK18-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP112]], align 8
-// CHECK18-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP114]], align 8
-// CHECK18-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 8, i64* [[TMP115]], align 8
-// CHECK18-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP116]], align 8
-// CHECK18-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP118]], align 8
-// CHECK18-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP120]], align 8
-// CHECK18-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 [[TMP104]], i64* [[TMP121]], align 8
-// CHECK18-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP122]], align 8
-// CHECK18-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP126:%.*]] = load i32, i32* [[N]], align 4
-// CHECK18-NEXT:    store i32 [[TMP126]], i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK18-NEXT:    [[TMP127:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK18-NEXT:    [[SUB44:%.*]] = sub nsw i32 [[TMP127]], 0
+// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP108]], align 8
+// CHECK18-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP110]], align 8
+// CHECK18-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP111]], align 8
+// CHECK18-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP113]], align 8
+// CHECK18-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP115]], align 8
+// CHECK18-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 2
+// CHECK18-NEXT:    store i64 [[TMP100]], i64* [[TMP116]], align 8
+// CHECK18-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS39]], i64 0, i64 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP117]], align 8
+// CHECK18-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS37]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS38]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES40]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP121:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP121]], i32* [[DOTCAPTURE_EXPR_42]], align 4
+// CHECK18-NEXT:    [[TMP122:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
+// CHECK18-NEXT:    [[SUB44:%.*]] = sub nsw i32 [[TMP122]], 0
 // CHECK18-NEXT:    [[DIV45:%.*]] = sdiv i32 [[SUB44]], 1
 // CHECK18-NEXT:    [[SUB46:%.*]] = sub nsw i32 [[DIV45]], 1
 // CHECK18-NEXT:    store i32 [[SUB46]], i32* [[DOTCAPTURE_EXPR_43]], align 4
-// CHECK18-NEXT:    [[TMP128:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_43]], align 4
-// CHECK18-NEXT:    [[ADD47:%.*]] = add nsw i32 [[TMP128]], 1
-// CHECK18-NEXT:    [[TMP129:%.*]] = zext i32 [[ADD47]] to i64
-// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP129]])
-// CHECK18-NEXT:    [[TMP130:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP123]], i8** [[TMP124]], i64* [[TMP125]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP131:%.*]] = icmp ne i32 [[TMP130]], 0
-// CHECK18-NEXT:    br i1 [[TMP131]], label [[OMP_OFFLOAD_FAILED48:%.*]], label [[OMP_OFFLOAD_CONT49:%.*]]
+// CHECK18-NEXT:    [[TMP123:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_43]], align 4
+// CHECK18-NEXT:    [[ADD47:%.*]] = add nsw i32 [[TMP123]], 1
+// CHECK18-NEXT:    [[TMP124:%.*]] = zext i32 [[ADD47]] to i64
+// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP124]])
+// CHECK18-NEXT:    [[TMP125:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP118]], i8** [[TMP119]], i64* [[TMP120]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP126:%.*]] = icmp ne i32 [[TMP125]], 0
+// CHECK18-NEXT:    br i1 [[TMP126]], label [[OMP_OFFLOAD_FAILED48:%.*]], label [[OMP_OFFLOAD_CONT49:%.*]]
 // CHECK18:       omp_offload.failed48:
-// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i64 [[TMP103]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i64 [[TMP99]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT49]]
 // CHECK18:       omp_offload.cont49:
-// CHECK18-NEXT:    [[TMP132:%.*]] = load i32, i32* [[M]], align 4
-// CHECK18-NEXT:    store i32 [[TMP132]], i32* [[DOTCAPTURE_EXPR_50]], align 4
-// CHECK18-NEXT:    [[TMP133:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    [[TMP127:%.*]] = load i32, i32* [[M]], align 4
+// CHECK18-NEXT:    store i32 [[TMP127]], i32* [[DOTCAPTURE_EXPR_50]], align 4
+// CHECK18-NEXT:    [[TMP128:%.*]] = load i32, i32* [[N]], align 4
 // CHECK18-NEXT:    [[CONV52:%.*]] = bitcast i64* [[N_CASTED51]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP133]], i32* [[CONV52]], align 4
-// CHECK18-NEXT:    [[TMP134:%.*]] = load i64, i64* [[N_CASTED51]], align 8
-// CHECK18-NEXT:    [[TMP135:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_50]], align 4
+// CHECK18-NEXT:    store i32 [[TMP128]], i32* [[CONV52]], align 4
+// CHECK18-NEXT:    [[TMP129:%.*]] = load i64, i64* [[N_CASTED51]], align 8
+// CHECK18-NEXT:    [[TMP130:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_50]], align 4
 // CHECK18-NEXT:    [[CONV54:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED53]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP135]], i32* [[CONV54]], align 4
-// CHECK18-NEXT:    [[TMP136:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED53]], align 8
-// CHECK18-NEXT:    [[TMP137:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK18-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP134]], i64* [[TMP139]], align 8
-// CHECK18-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP134]], i64* [[TMP141]], align 8
-// CHECK18-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 4, i64* [[TMP142]], align 8
-// CHECK18-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 0
+// CHECK18-NEXT:    store i32 [[TMP130]], i32* [[CONV54]], align 4
+// CHECK18-NEXT:    [[TMP131:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED53]], align 8
+// CHECK18-NEXT:    [[TMP132:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK18-NEXT:    [[TMP133:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES58]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP133]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i64 32, i1 false)
+// CHECK18-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP129]], i64* [[TMP135]], align 8
+// CHECK18-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP129]], i64* [[TMP137]], align 8
+// CHECK18-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 0
+// CHECK18-NEXT:    store i8* null, i8** [[TMP138]], align 8
+// CHECK18-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP140]], align 8
+// CHECK18-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP142]], align 8
+// CHECK18-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 1
 // CHECK18-NEXT:    store i8* null, i8** [[TMP143]], align 8
-// CHECK18-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP145]], align 8
-// CHECK18-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP147]], align 8
-// CHECK18-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 8, i64* [[TMP148]], align 8
-// CHECK18-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 1
+// CHECK18-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP145]], align 8
+// CHECK18-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP147]], align 8
+// CHECK18-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 2
+// CHECK18-NEXT:    store i64 [[TMP132]], i64* [[TMP148]], align 8
+// CHECK18-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 2
 // CHECK18-NEXT:    store i8* null, i8** [[TMP149]], align 8
-// CHECK18-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP151]], align 8
-// CHECK18-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP153]], align 8
-// CHECK18-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 [[TMP137]], i64* [[TMP154]], align 8
-// CHECK18-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP155]], align 8
-// CHECK18-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP136]], i64* [[TMP157]], align 8
-// CHECK18-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP136]], i64* [[TMP159]], align 8
-// CHECK18-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 3
-// CHECK18-NEXT:    store i64 4, i64* [[TMP160]], align 8
-// CHECK18-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 3
-// CHECK18-NEXT:    store i8* null, i8** [[TMP161]], align 8
-// CHECK18-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP165:%.*]] = load i32, i32* [[N]], align 4
-// CHECK18-NEXT:    store i32 [[TMP165]], i32* [[DOTCAPTURE_EXPR_60]], align 4
-// CHECK18-NEXT:    [[TMP166:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_60]], align 4
-// CHECK18-NEXT:    [[SUB62:%.*]] = sub nsw i32 [[TMP166]], 0
+// CHECK18-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP131]], i64* [[TMP151]], align 8
+// CHECK18-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP131]], i64* [[TMP153]], align 8
+// CHECK18-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS57]], i64 0, i64 3
+// CHECK18-NEXT:    store i8* null, i8** [[TMP154]], align 8
+// CHECK18-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS55]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS56]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES58]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP158:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP158]], i32* [[DOTCAPTURE_EXPR_60]], align 4
+// CHECK18-NEXT:    [[TMP159:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_60]], align 4
+// CHECK18-NEXT:    [[SUB62:%.*]] = sub nsw i32 [[TMP159]], 0
 // CHECK18-NEXT:    [[DIV63:%.*]] = sdiv i32 [[SUB62]], 1
 // CHECK18-NEXT:    [[SUB64:%.*]] = sub nsw i32 [[DIV63]], 1
 // CHECK18-NEXT:    store i32 [[SUB64]], i32* [[DOTCAPTURE_EXPR_61]], align 4
-// CHECK18-NEXT:    [[TMP167:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_61]], align 4
-// CHECK18-NEXT:    [[ADD65:%.*]] = add nsw i32 [[TMP167]], 1
-// CHECK18-NEXT:    [[TMP168:%.*]] = zext i32 [[ADD65]] to i64
-// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP168]])
-// CHECK18-NEXT:    [[TMP169:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP162]], i8** [[TMP163]], i64* [[TMP164]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP170:%.*]] = icmp ne i32 [[TMP169]], 0
-// CHECK18-NEXT:    br i1 [[TMP170]], label [[OMP_OFFLOAD_FAILED66:%.*]], label [[OMP_OFFLOAD_CONT67:%.*]]
+// CHECK18-NEXT:    [[TMP160:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_61]], align 4
+// CHECK18-NEXT:    [[ADD65:%.*]] = add nsw i32 [[TMP160]], 1
+// CHECK18-NEXT:    [[TMP161:%.*]] = zext i32 [[ADD65]] to i64
+// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP161]])
+// CHECK18-NEXT:    [[TMP162:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP155]], i8** [[TMP156]], i64* [[TMP157]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP163:%.*]] = icmp ne i32 [[TMP162]], 0
+// CHECK18-NEXT:    br i1 [[TMP163]], label [[OMP_OFFLOAD_FAILED66:%.*]], label [[OMP_OFFLOAD_CONT67:%.*]]
 // CHECK18:       omp_offload.failed66:
-// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i64 [[TMP134]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP136]]) #[[ATTR3]]
+// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i64 [[TMP129]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP131]]) #[[ATTR3]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT67]]
 // CHECK18:       omp_offload.cont67:
-// CHECK18-NEXT:    [[TMP171:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK18-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP171]])
+// CHECK18-NEXT:    [[TMP164:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK18-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP164]])
 // CHECK18-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK18-NEXT:    [[TMP172:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP172]])
-// CHECK18-NEXT:    [[TMP173:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK18-NEXT:    ret i32 [[TMP173]]
+// CHECK18-NEXT:    [[TMP165:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP165]])
+// CHECK18-NEXT:    [[TMP166:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK18-NEXT:    ret i32 [[TMP166]]
 //
 //
 // CHECK18-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139
@@ -22972,11 +22888,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK18-NEXT:    [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -23063,7 +22979,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !28
 // CHECK18-NEXT:    [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]), !llvm.access.group !28
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]), !llvm.access.group !28
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
@@ -23117,7 +23033,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -23254,11 +23170,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK18-NEXT:    [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -23335,7 +23251,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !34
 // CHECK18-NEXT:    [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !34
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !34
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !34
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
@@ -23366,7 +23282,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -23504,11 +23420,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK18-NEXT:    [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -23593,7 +23509,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP21]], i32* [[CONV9]], align 4, !llvm.access.group !40
 // CHECK18-NEXT:    [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]]), !llvm.access.group !40
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]]), !llvm.access.group !40
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40
@@ -23624,7 +23540,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -23742,7 +23658,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK18-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK18-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK18-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK18-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -23784,7 +23700,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK18-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK18-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK18-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK18:       omp_offload.failed:
@@ -23802,7 +23718,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK18-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK18-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK18-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK18:       omp_offload.failed5:
@@ -23834,7 +23750,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK18-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK18-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK18-NEXT:    br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK18:       omp_offload.failed11:
@@ -23852,7 +23768,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK18-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK18-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK18-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK18:       omp_offload.failed17:
@@ -23884,7 +23800,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
 // CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK18-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK18-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
 // CHECK18-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED26:%.*]], label [[OMP_OFFLOAD_CONT27:%.*]]
 // CHECK18:       omp_offload.failed26:
@@ -23900,11 +23816,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK18-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK18-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -23952,7 +23868,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK18-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46
 // CHECK18-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
@@ -23974,7 +23890,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -24064,11 +23980,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK18-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK18-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -24116,7 +24032,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK18-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52
 // CHECK18-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52
@@ -24138,7 +24054,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -24236,11 +24152,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP1]], i32* [[CONV1]], align 4
 // CHECK18-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -24296,7 +24212,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58
 // CHECK18-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58
@@ -24318,7 +24234,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -24433,11 +24349,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK18-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK18-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -24485,7 +24401,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK18-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64
 // CHECK18-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64
@@ -24507,7 +24423,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -24603,11 +24519,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP1]], i32* [[CONV1]], align 4
 // CHECK18-NEXT:    [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP2]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -24663,7 +24579,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70
 // CHECK18-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70
@@ -24685,7 +24601,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -24772,7 +24688,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK18-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK18-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK18-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK18-NEXT:    ret void
@@ -24847,14 +24763,14 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK19-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK19-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK19-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK19-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK19-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK19-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK19-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK19-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK19-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK19-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK19-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -24863,302 +24779,288 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK19-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK19-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK19-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK19-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK19-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK19-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK19-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK19-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK19-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK19-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK19-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK19-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK19-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK19-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK19-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK19-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK19-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK19-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK19-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK19-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK19-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK19-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK19-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK19-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK19-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK19-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK19-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK19-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK19-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK19-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK19-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK19-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK19:       omp_offload.failed:
 // CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK19:       omp_offload.cont:
-// CHECK19-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK19-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK19-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK19-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK19-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK19-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK19-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK19-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK19-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i32 24, i1 false)
 // CHECK19-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK19-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK19-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK19-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK19-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK19-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK19-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK19-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK19-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK19-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK19-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK19-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK19-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK19-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK19-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK19-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK19-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK19-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK19-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK19-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK19-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK19-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK19-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK19-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK19-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK19-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK19-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK19-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK19-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK19-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK19-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK19-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK19-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK19-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK19-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK19-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK19-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK19-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP61]])
-// CHECK19-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK19-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK19-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK19-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK19-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP59]])
+// CHECK19-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK19-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK19:       omp_offload.failed15:
-// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK19:       omp_offload.cont16:
-// CHECK19-NEXT:    [[TMP64:%.*]] = load i32, i32* [[M]], align 4
-// CHECK19-NEXT:    store i32 [[TMP64]], i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK19-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP65]], i32* [[N_CASTED18]], align 4
-// CHECK19-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N_CASTED18]], align 4
-// CHECK19-NEXT:    [[TMP67:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK19-NEXT:    store i32 [[TMP67]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    [[TMP68:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    [[TMP69:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK19-NEXT:    [[TMP70:%.*]] = sext i32 [[TMP69]] to i64
-// CHECK19-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP66]], i32* [[TMP72]], align 4
-// CHECK19-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP66]], i32* [[TMP74]], align 4
-// CHECK19-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 4, i64* [[TMP75]], align 4
-// CHECK19-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
-// CHECK19-NEXT:    store i8* null, i8** [[TMP76]], align 4
-// CHECK19-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP62:%.*]] = load i32, i32* [[M]], align 4
+// CHECK19-NEXT:    store i32 [[TMP62]], i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK19-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP63]], i32* [[N_CASTED18]], align 4
+// CHECK19-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N_CASTED18]], align 4
+// CHECK19-NEXT:    [[TMP65:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK19-NEXT:    store i32 [[TMP65]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK19-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK19-NEXT:    [[TMP67:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK19-NEXT:    [[TMP68:%.*]] = sext i32 [[TMP67]] to i64
+// CHECK19-NEXT:    [[TMP69:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES22]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP69]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i32 32, i1 false)
+// CHECK19-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP64]], i32* [[TMP71]], align 4
+// CHECK19-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP64]], i32* [[TMP73]], align 4
+// CHECK19-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
+// CHECK19-NEXT:    store i8* null, i8** [[TMP74]], align 4
+// CHECK19-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP76]], align 4
+// CHECK19-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
 // CHECK19-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
 // CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP78]], align 4
-// CHECK19-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
-// CHECK19-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP81]], align 4
-// CHECK19-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP82]], align 4
-// CHECK19-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 4
-// CHECK19-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP86]], align 4
-// CHECK19-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 [[TMP70]], i64* [[TMP87]], align 4
-// CHECK19-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP88]], align 4
-// CHECK19-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP68]], i32* [[TMP90]], align 4
-// CHECK19-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP68]], i32* [[TMP92]], align 4
-// CHECK19-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 3
-// CHECK19-NEXT:    store i64 4, i64* [[TMP93]], align 4
-// CHECK19-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
-// CHECK19-NEXT:    store i8* null, i8** [[TMP94]], align 4
-// CHECK19-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP98:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP98]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK19-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK19-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP99]], 0
+// CHECK19-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP79]], align 4
+// CHECK19-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 4
+// CHECK19-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 4
+// CHECK19-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
+// CHECK19-NEXT:    store i64 [[TMP68]], i64* [[TMP84]], align 4
+// CHECK19-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP85]], align 4
+// CHECK19-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP66]], i32* [[TMP87]], align 4
+// CHECK19-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP66]], i32* [[TMP89]], align 4
+// CHECK19-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
+// CHECK19-NEXT:    store i8* null, i8** [[TMP90]], align 4
+// CHECK19-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP94:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP94]], i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK19-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK19-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP95]], 0
 // CHECK19-NEXT:    [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1
 // CHECK19-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[DIV27]], 1
 // CHECK19-NEXT:    store i32 [[SUB28]], i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK19-NEXT:    [[TMP100:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK19-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP100]], 1
-// CHECK19-NEXT:    [[TMP101:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP101]])
-// CHECK19-NEXT:    [[TMP102:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP95]], i8** [[TMP96]], i64* [[TMP97]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP103:%.*]] = icmp ne i32 [[TMP102]], 0
-// CHECK19-NEXT:    br i1 [[TMP103]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK19-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK19-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP96]], 1
+// CHECK19-NEXT:    [[TMP97:%.*]] = zext i32 [[ADD29]] to i64
+// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP97]])
+// CHECK19-NEXT:    [[TMP98:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP91]], i8** [[TMP92]], i64* [[TMP93]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP99:%.*]] = icmp ne i32 [[TMP98]], 0
+// CHECK19-NEXT:    br i1 [[TMP99]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
 // CHECK19:       omp_offload.failed30:
-// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i32 [[TMP66]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP68]]) #[[ATTR3]]
+// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i32 [[TMP64]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP66]]) #[[ATTR3]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
 // CHECK19:       omp_offload.cont31:
-// CHECK19-NEXT:    [[TMP104:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP104]], i32* [[N_CASTED32]], align 4
-// CHECK19-NEXT:    [[TMP105:%.*]] = load i32, i32* [[N_CASTED32]], align 4
-// CHECK19-NEXT:    [[TMP106:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK19-NEXT:    [[TMP107:%.*]] = sext i32 [[TMP106]] to i64
-// CHECK19-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP105]], i32* [[TMP109]], align 4
-// CHECK19-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP100:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP100]], i32* [[N_CASTED32]], align 4
+// CHECK19-NEXT:    [[TMP101:%.*]] = load i32, i32* [[N_CASTED32]], align 4
+// CHECK19-NEXT:    [[TMP102:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK19-NEXT:    [[TMP103:%.*]] = sext i32 [[TMP102]] to i64
+// CHECK19-NEXT:    [[TMP104:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES36]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP104]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i32 24, i1 false)
+// CHECK19-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP101]], i32* [[TMP106]], align 4
+// CHECK19-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP101]], i32* [[TMP108]], align 4
+// CHECK19-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 0
+// CHECK19-NEXT:    store i8* null, i8** [[TMP109]], align 4
+// CHECK19-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 1
 // CHECK19-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP105]], i32* [[TMP111]], align 4
-// CHECK19-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 4, i64* [[TMP112]], align 4
-// CHECK19-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 0
-// CHECK19-NEXT:    store i8* null, i8** [[TMP113]], align 4
-// CHECK19-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP115]], align 4
-// CHECK19-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP117]], align 4
-// CHECK19-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP118]], align 4
-// CHECK19-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP119]], align 4
-// CHECK19-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP121:%.*]] = bitcast i8** [[TMP120]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP121]], align 4
-// CHECK19-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP123]], align 4
-// CHECK19-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 [[TMP107]], i64* [[TMP124]], align 4
-// CHECK19-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP125]], align 4
-// CHECK19-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP129:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP129]], i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK19-NEXT:    [[TMP130:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK19-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[TMP130]], 0
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP111]], align 4
+// CHECK19-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP113]], align 4
+// CHECK19-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP114]], align 4
+// CHECK19-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP116]], align 4
+// CHECK19-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP118]], align 4
+// CHECK19-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 2
+// CHECK19-NEXT:    store i64 [[TMP103]], i64* [[TMP119]], align 4
+// CHECK19-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP120]], align 4
+// CHECK19-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP124:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP124]], i32* [[DOTCAPTURE_EXPR_38]], align 4
+// CHECK19-NEXT:    [[TMP125:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
+// CHECK19-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[TMP125]], 0
 // CHECK19-NEXT:    [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1
 // CHECK19-NEXT:    [[SUB42:%.*]] = sub nsw i32 [[DIV41]], 1
 // CHECK19-NEXT:    store i32 [[SUB42]], i32* [[DOTCAPTURE_EXPR_39]], align 4
-// CHECK19-NEXT:    [[TMP131:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4
-// CHECK19-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP131]], 1
-// CHECK19-NEXT:    [[TMP132:%.*]] = zext i32 [[ADD43]] to i64
-// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP132]])
-// CHECK19-NEXT:    [[TMP133:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP126]], i8** [[TMP127]], i64* [[TMP128]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP134:%.*]] = icmp ne i32 [[TMP133]], 0
-// CHECK19-NEXT:    br i1 [[TMP134]], label [[OMP_OFFLOAD_FAILED44:%.*]], label [[OMP_OFFLOAD_CONT45:%.*]]
+// CHECK19-NEXT:    [[TMP126:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4
+// CHECK19-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP126]], 1
+// CHECK19-NEXT:    [[TMP127:%.*]] = zext i32 [[ADD43]] to i64
+// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP127]])
+// CHECK19-NEXT:    [[TMP128:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP121]], i8** [[TMP122]], i64* [[TMP123]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP129:%.*]] = icmp ne i32 [[TMP128]], 0
+// CHECK19-NEXT:    br i1 [[TMP129]], label [[OMP_OFFLOAD_FAILED44:%.*]], label [[OMP_OFFLOAD_CONT45:%.*]]
 // CHECK19:       omp_offload.failed44:
-// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i32 [[TMP105]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i32 [[TMP101]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT45]]
 // CHECK19:       omp_offload.cont45:
-// CHECK19-NEXT:    [[TMP135:%.*]] = load i32, i32* [[M]], align 4
-// CHECK19-NEXT:    store i32 [[TMP135]], i32* [[DOTCAPTURE_EXPR_46]], align 4
-// CHECK19-NEXT:    [[TMP136:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP136]], i32* [[N_CASTED47]], align 4
-// CHECK19-NEXT:    [[TMP137:%.*]] = load i32, i32* [[N_CASTED47]], align 4
-// CHECK19-NEXT:    [[TMP138:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_46]], align 4
-// CHECK19-NEXT:    store i32 [[TMP138]], i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
-// CHECK19-NEXT:    [[TMP139:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
-// CHECK19-NEXT:    [[TMP140:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK19-NEXT:    [[TMP141:%.*]] = sext i32 [[TMP140]] to i64
-// CHECK19-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP137]], i32* [[TMP143]], align 4
-// CHECK19-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP137]], i32* [[TMP145]], align 4
-// CHECK19-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 4, i64* [[TMP146]], align 4
-// CHECK19-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP130:%.*]] = load i32, i32* [[M]], align 4
+// CHECK19-NEXT:    store i32 [[TMP130]], i32* [[DOTCAPTURE_EXPR_46]], align 4
+// CHECK19-NEXT:    [[TMP131:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP131]], i32* [[N_CASTED47]], align 4
+// CHECK19-NEXT:    [[TMP132:%.*]] = load i32, i32* [[N_CASTED47]], align 4
+// CHECK19-NEXT:    [[TMP133:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_46]], align 4
+// CHECK19-NEXT:    store i32 [[TMP133]], i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
+// CHECK19-NEXT:    [[TMP134:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
+// CHECK19-NEXT:    [[TMP135:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK19-NEXT:    [[TMP136:%.*]] = sext i32 [[TMP135]] to i64
+// CHECK19-NEXT:    [[TMP137:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES52]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP137]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i32 32, i1 false)
+// CHECK19-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP132]], i32* [[TMP139]], align 4
+// CHECK19-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP132]], i32* [[TMP141]], align 4
+// CHECK19-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 0
+// CHECK19-NEXT:    store i8* null, i8** [[TMP142]], align 4
+// CHECK19-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP144]], align 4
+// CHECK19-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP146]], align 4
+// CHECK19-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 1
 // CHECK19-NEXT:    store i8* null, i8** [[TMP147]], align 4
-// CHECK19-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP149]], align 4
-// CHECK19-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP151]], align 4
-// CHECK19-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP152]], align 4
-// CHECK19-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP149]], align 4
+// CHECK19-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP151]], align 4
+// CHECK19-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 2
+// CHECK19-NEXT:    store i64 [[TMP136]], i64* [[TMP152]], align 4
+// CHECK19-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 2
 // CHECK19-NEXT:    store i8* null, i8** [[TMP153]], align 4
-// CHECK19-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP155]], align 4
-// CHECK19-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP157]], align 4
-// CHECK19-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 [[TMP141]], i64* [[TMP158]], align 4
-// CHECK19-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP159]], align 4
-// CHECK19-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP161:%.*]] = bitcast i8** [[TMP160]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP139]], i32* [[TMP161]], align 4
-// CHECK19-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP163:%.*]] = bitcast i8** [[TMP162]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP139]], i32* [[TMP163]], align 4
-// CHECK19-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 3
-// CHECK19-NEXT:    store i64 4, i64* [[TMP164]], align 4
-// CHECK19-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 3
-// CHECK19-NEXT:    store i8* null, i8** [[TMP165]], align 4
-// CHECK19-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP169:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP169]], i32* [[DOTCAPTURE_EXPR_54]], align 4
-// CHECK19-NEXT:    [[TMP170:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_54]], align 4
-// CHECK19-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[TMP170]], 0
+// CHECK19-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP134]], i32* [[TMP155]], align 4
+// CHECK19-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP134]], i32* [[TMP157]], align 4
+// CHECK19-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 3
+// CHECK19-NEXT:    store i8* null, i8** [[TMP158]], align 4
+// CHECK19-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP162:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP162]], i32* [[DOTCAPTURE_EXPR_54]], align 4
+// CHECK19-NEXT:    [[TMP163:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_54]], align 4
+// CHECK19-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[TMP163]], 0
 // CHECK19-NEXT:    [[DIV57:%.*]] = sdiv i32 [[SUB56]], 1
 // CHECK19-NEXT:    [[SUB58:%.*]] = sub nsw i32 [[DIV57]], 1
 // CHECK19-NEXT:    store i32 [[SUB58]], i32* [[DOTCAPTURE_EXPR_55]], align 4
-// CHECK19-NEXT:    [[TMP171:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_55]], align 4
-// CHECK19-NEXT:    [[ADD59:%.*]] = add nsw i32 [[TMP171]], 1
-// CHECK19-NEXT:    [[TMP172:%.*]] = zext i32 [[ADD59]] to i64
-// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP172]])
-// CHECK19-NEXT:    [[TMP173:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP166]], i8** [[TMP167]], i64* [[TMP168]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP174:%.*]] = icmp ne i32 [[TMP173]], 0
-// CHECK19-NEXT:    br i1 [[TMP174]], label [[OMP_OFFLOAD_FAILED60:%.*]], label [[OMP_OFFLOAD_CONT61:%.*]]
+// CHECK19-NEXT:    [[TMP164:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_55]], align 4
+// CHECK19-NEXT:    [[ADD59:%.*]] = add nsw i32 [[TMP164]], 1
+// CHECK19-NEXT:    [[TMP165:%.*]] = zext i32 [[ADD59]] to i64
+// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP165]])
+// CHECK19-NEXT:    [[TMP166:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP159]], i8** [[TMP160]], i64* [[TMP161]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP167:%.*]] = icmp ne i32 [[TMP166]], 0
+// CHECK19-NEXT:    br i1 [[TMP167]], label [[OMP_OFFLOAD_FAILED60:%.*]], label [[OMP_OFFLOAD_CONT61:%.*]]
 // CHECK19:       omp_offload.failed60:
-// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i32 [[TMP137]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP139]]) #[[ATTR3]]
+// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i32 [[TMP132]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP134]]) #[[ATTR3]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT61]]
 // CHECK19:       omp_offload.cont61:
-// CHECK19-NEXT:    [[TMP175:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK19-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP175]])
+// CHECK19-NEXT:    [[TMP168:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK19-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP168]])
 // CHECK19-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK19-NEXT:    [[TMP176:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP176]])
-// CHECK19-NEXT:    [[TMP177:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK19-NEXT:    ret i32 [[TMP177]]
+// CHECK19-NEXT:    [[TMP169:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP169]])
+// CHECK19-NEXT:    [[TMP170:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK19-NEXT:    ret i32 [[TMP170]]
 //
 //
 // CHECK19-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139
@@ -25654,11 +25556,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK19-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -25739,7 +25641,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !29
 // CHECK19-NEXT:    store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29
 // CHECK19-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]), !llvm.access.group !29
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]), !llvm.access.group !29
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29
@@ -25793,7 +25695,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -25923,11 +25825,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP2]], i32* [[N_CASTED]], align 4
 // CHECK19-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26000,7 +25902,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !35
 // CHECK19-NEXT:    store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !35
 // CHECK19-NEXT:    [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !35
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !35
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !35
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35
@@ -26031,7 +25933,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26161,11 +26063,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK19-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26244,7 +26146,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !41
 // CHECK19-NEXT:    store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41
 // CHECK19-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]]), !llvm.access.group !41
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]]), !llvm.access.group !41
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41
@@ -26275,7 +26177,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26388,7 +26290,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK19-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK19-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK19-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -26430,7 +26332,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK19-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK19-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK19-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK19:       omp_offload.failed:
@@ -26448,7 +26350,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK19-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK19-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK19-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK19:       omp_offload.failed5:
@@ -26479,7 +26381,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK19-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK19-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK19-NEXT:    br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK19:       omp_offload.failed11:
@@ -26497,7 +26399,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK19-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK19-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK19-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK19:       omp_offload.failed17:
@@ -26528,7 +26430,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
 // CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK19-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK19-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
 // CHECK19-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED25:%.*]], label [[OMP_OFFLOAD_CONT26:%.*]]
 // CHECK19:       omp_offload.failed25:
@@ -26544,11 +26446,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK19-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK19-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26594,7 +26496,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.inner.for.body:
 // CHECK19-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47
 // CHECK19-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47
@@ -26616,7 +26518,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26703,11 +26605,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK19-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK19-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26753,7 +26655,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.inner.for.body:
 // CHECK19-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !53
 // CHECK19-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53
@@ -26775,7 +26677,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26868,11 +26770,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK19-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26924,7 +26826,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !59
 // CHECK19-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59
 // CHECK19-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59
@@ -26946,7 +26848,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -27055,11 +26957,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK19-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK19-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -27105,7 +27007,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.inner.for.body:
 // CHECK19-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !65
 // CHECK19-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65
@@ -27127,7 +27029,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -27218,11 +27120,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK19-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -27274,7 +27176,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !71
 // CHECK19-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71
 // CHECK19-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71
@@ -27296,7 +27198,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -27379,7 +27281,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK19-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK19-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK19-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK19-NEXT:    ret void
@@ -27454,14 +27356,14 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK20-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK20-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK20-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK20-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK20-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK20-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK20-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK20-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK20-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK20-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK20-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -27470,302 +27372,288 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK20-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK20-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK20-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK20-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK20-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK20-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK20-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK20-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK20-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK20-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK20-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK20-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK20-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK20-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK20-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK20-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK20-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK20-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK20-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK20-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK20-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK20-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK20-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK20-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK20-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK20-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK20-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK20-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK20-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK20-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK20-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK20-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK20:       omp_offload.failed:
 // CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK20:       omp_offload.cont:
-// CHECK20-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK20-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK20-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK20-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK20-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK20-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK20-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK20-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK20-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i32 24, i1 false)
 // CHECK20-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK20-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK20-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK20-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK20-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK20-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK20-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK20-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK20-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK20-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK20-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK20-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK20-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK20-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK20-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK20-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK20-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK20-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK20-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK20-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK20-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK20-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK20-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK20-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK20-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK20-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK20-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK20-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK20-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK20-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK20-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK20-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK20-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK20-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK20-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK20-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK20-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK20-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP61]])
-// CHECK20-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK20-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK20-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK20-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK20-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP59]])
+// CHECK20-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK20-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK20:       omp_offload.failed15:
-// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l143(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK20:       omp_offload.cont16:
-// CHECK20-NEXT:    [[TMP64:%.*]] = load i32, i32* [[M]], align 4
-// CHECK20-NEXT:    store i32 [[TMP64]], i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK20-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP65]], i32* [[N_CASTED18]], align 4
-// CHECK20-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N_CASTED18]], align 4
-// CHECK20-NEXT:    [[TMP67:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK20-NEXT:    store i32 [[TMP67]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    [[TMP68:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    [[TMP69:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK20-NEXT:    [[TMP70:%.*]] = sext i32 [[TMP69]] to i64
-// CHECK20-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP66]], i32* [[TMP72]], align 4
-// CHECK20-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP66]], i32* [[TMP74]], align 4
-// CHECK20-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 4, i64* [[TMP75]], align 4
-// CHECK20-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
-// CHECK20-NEXT:    store i8* null, i8** [[TMP76]], align 4
-// CHECK20-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP62:%.*]] = load i32, i32* [[M]], align 4
+// CHECK20-NEXT:    store i32 [[TMP62]], i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK20-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP63]], i32* [[N_CASTED18]], align 4
+// CHECK20-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N_CASTED18]], align 4
+// CHECK20-NEXT:    [[TMP65:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK20-NEXT:    store i32 [[TMP65]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK20-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK20-NEXT:    [[TMP67:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK20-NEXT:    [[TMP68:%.*]] = sext i32 [[TMP67]] to i64
+// CHECK20-NEXT:    [[TMP69:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES22]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP69]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i32 32, i1 false)
+// CHECK20-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP64]], i32* [[TMP71]], align 4
+// CHECK20-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP64]], i32* [[TMP73]], align 4
+// CHECK20-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
+// CHECK20-NEXT:    store i8* null, i8** [[TMP74]], align 4
+// CHECK20-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP76]], align 4
+// CHECK20-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
 // CHECK20-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
 // CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP78]], align 4
-// CHECK20-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
-// CHECK20-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP81]], align 4
-// CHECK20-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP82]], align 4
-// CHECK20-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 4
-// CHECK20-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP86]], align 4
-// CHECK20-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 [[TMP70]], i64* [[TMP87]], align 4
-// CHECK20-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP88]], align 4
-// CHECK20-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP68]], i32* [[TMP90]], align 4
-// CHECK20-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP68]], i32* [[TMP92]], align 4
-// CHECK20-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 3
-// CHECK20-NEXT:    store i64 4, i64* [[TMP93]], align 4
-// CHECK20-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
-// CHECK20-NEXT:    store i8* null, i8** [[TMP94]], align 4
-// CHECK20-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP98:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP98]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK20-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK20-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP99]], 0
+// CHECK20-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP79]], align 4
+// CHECK20-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 4
+// CHECK20-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 4
+// CHECK20-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
+// CHECK20-NEXT:    store i64 [[TMP68]], i64* [[TMP84]], align 4
+// CHECK20-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP85]], align 4
+// CHECK20-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP66]], i32* [[TMP87]], align 4
+// CHECK20-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP66]], i32* [[TMP89]], align 4
+// CHECK20-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
+// CHECK20-NEXT:    store i8* null, i8** [[TMP90]], align 4
+// CHECK20-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP94:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP94]], i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK20-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK20-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP95]], 0
 // CHECK20-NEXT:    [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1
 // CHECK20-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[DIV27]], 1
 // CHECK20-NEXT:    store i32 [[SUB28]], i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK20-NEXT:    [[TMP100:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK20-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP100]], 1
-// CHECK20-NEXT:    [[TMP101:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP101]])
-// CHECK20-NEXT:    [[TMP102:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP95]], i8** [[TMP96]], i64* [[TMP97]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP103:%.*]] = icmp ne i32 [[TMP102]], 0
-// CHECK20-NEXT:    br i1 [[TMP103]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK20-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK20-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP96]], 1
+// CHECK20-NEXT:    [[TMP97:%.*]] = zext i32 [[ADD29]] to i64
+// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP97]])
+// CHECK20-NEXT:    [[TMP98:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, i32 4, i8** [[TMP91]], i8** [[TMP92]], i64* [[TMP93]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP99:%.*]] = icmp ne i32 [[TMP98]], 0
+// CHECK20-NEXT:    br i1 [[TMP99]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
 // CHECK20:       omp_offload.failed30:
-// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i32 [[TMP66]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP68]]) #[[ATTR3]]
+// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(i32 [[TMP64]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP66]]) #[[ATTR3]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
 // CHECK20:       omp_offload.cont31:
-// CHECK20-NEXT:    [[TMP104:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP104]], i32* [[N_CASTED32]], align 4
-// CHECK20-NEXT:    [[TMP105:%.*]] = load i32, i32* [[N_CASTED32]], align 4
-// CHECK20-NEXT:    [[TMP106:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK20-NEXT:    [[TMP107:%.*]] = sext i32 [[TMP106]] to i64
-// CHECK20-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP105]], i32* [[TMP109]], align 4
-// CHECK20-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP100:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP100]], i32* [[N_CASTED32]], align 4
+// CHECK20-NEXT:    [[TMP101:%.*]] = load i32, i32* [[N_CASTED32]], align 4
+// CHECK20-NEXT:    [[TMP102:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK20-NEXT:    [[TMP103:%.*]] = sext i32 [[TMP102]] to i64
+// CHECK20-NEXT:    [[TMP104:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES36]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP104]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i32 24, i1 false)
+// CHECK20-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP106:%.*]] = bitcast i8** [[TMP105]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP101]], i32* [[TMP106]], align 4
+// CHECK20-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP101]], i32* [[TMP108]], align 4
+// CHECK20-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 0
+// CHECK20-NEXT:    store i8* null, i8** [[TMP109]], align 4
+// CHECK20-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 1
 // CHECK20-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP105]], i32* [[TMP111]], align 4
-// CHECK20-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 4, i64* [[TMP112]], align 4
-// CHECK20-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 0
-// CHECK20-NEXT:    store i8* null, i8** [[TMP113]], align 4
-// CHECK20-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP115]], align 4
-// CHECK20-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP117]], align 4
-// CHECK20-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP118]], align 4
-// CHECK20-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP119]], align 4
-// CHECK20-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP121:%.*]] = bitcast i8** [[TMP120]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP121]], align 4
-// CHECK20-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP123]], align 4
-// CHECK20-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 [[TMP107]], i64* [[TMP124]], align 4
-// CHECK20-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP125]], align 4
-// CHECK20-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP128:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP129:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP129]], i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK20-NEXT:    [[TMP130:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK20-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[TMP130]], 0
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP111]], align 4
+// CHECK20-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP113]], align 4
+// CHECK20-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP114]], align 4
+// CHECK20-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP116]], align 4
+// CHECK20-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP118]], align 4
+// CHECK20-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 2
+// CHECK20-NEXT:    store i64 [[TMP103]], i64* [[TMP119]], align 4
+// CHECK20-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS35]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP120]], align 4
+// CHECK20-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS33]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS34]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES36]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP124:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP124]], i32* [[DOTCAPTURE_EXPR_38]], align 4
+// CHECK20-NEXT:    [[TMP125:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
+// CHECK20-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[TMP125]], 0
 // CHECK20-NEXT:    [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1
 // CHECK20-NEXT:    [[SUB42:%.*]] = sub nsw i32 [[DIV41]], 1
 // CHECK20-NEXT:    store i32 [[SUB42]], i32* [[DOTCAPTURE_EXPR_39]], align 4
-// CHECK20-NEXT:    [[TMP131:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4
-// CHECK20-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP131]], 1
-// CHECK20-NEXT:    [[TMP132:%.*]] = zext i32 [[ADD43]] to i64
-// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP132]])
-// CHECK20-NEXT:    [[TMP133:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP126]], i8** [[TMP127]], i64* [[TMP128]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP134:%.*]] = icmp ne i32 [[TMP133]], 0
-// CHECK20-NEXT:    br i1 [[TMP134]], label [[OMP_OFFLOAD_FAILED44:%.*]], label [[OMP_OFFLOAD_CONT45:%.*]]
+// CHECK20-NEXT:    [[TMP126:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_39]], align 4
+// CHECK20-NEXT:    [[ADD43:%.*]] = add nsw i32 [[TMP126]], 1
+// CHECK20-NEXT:    [[TMP127:%.*]] = zext i32 [[ADD43]] to i64
+// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP127]])
+// CHECK20-NEXT:    [[TMP128:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, i32 3, i8** [[TMP121]], i8** [[TMP122]], i64* [[TMP123]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP129:%.*]] = icmp ne i32 [[TMP128]], 0
+// CHECK20-NEXT:    br i1 [[TMP129]], label [[OMP_OFFLOAD_FAILED44:%.*]], label [[OMP_OFFLOAD_CONT45:%.*]]
 // CHECK20:       omp_offload.failed44:
-// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i32 [[TMP105]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(i32 [[TMP101]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT45]]
 // CHECK20:       omp_offload.cont45:
-// CHECK20-NEXT:    [[TMP135:%.*]] = load i32, i32* [[M]], align 4
-// CHECK20-NEXT:    store i32 [[TMP135]], i32* [[DOTCAPTURE_EXPR_46]], align 4
-// CHECK20-NEXT:    [[TMP136:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP136]], i32* [[N_CASTED47]], align 4
-// CHECK20-NEXT:    [[TMP137:%.*]] = load i32, i32* [[N_CASTED47]], align 4
-// CHECK20-NEXT:    [[TMP138:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_46]], align 4
-// CHECK20-NEXT:    store i32 [[TMP138]], i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
-// CHECK20-NEXT:    [[TMP139:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
-// CHECK20-NEXT:    [[TMP140:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK20-NEXT:    [[TMP141:%.*]] = sext i32 [[TMP140]] to i64
-// CHECK20-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP137]], i32* [[TMP143]], align 4
-// CHECK20-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP137]], i32* [[TMP145]], align 4
-// CHECK20-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 4, i64* [[TMP146]], align 4
-// CHECK20-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP130:%.*]] = load i32, i32* [[M]], align 4
+// CHECK20-NEXT:    store i32 [[TMP130]], i32* [[DOTCAPTURE_EXPR_46]], align 4
+// CHECK20-NEXT:    [[TMP131:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP131]], i32* [[N_CASTED47]], align 4
+// CHECK20-NEXT:    [[TMP132:%.*]] = load i32, i32* [[N_CASTED47]], align 4
+// CHECK20-NEXT:    [[TMP133:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_46]], align 4
+// CHECK20-NEXT:    store i32 [[TMP133]], i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
+// CHECK20-NEXT:    [[TMP134:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED48]], align 4
+// CHECK20-NEXT:    [[TMP135:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK20-NEXT:    [[TMP136:%.*]] = sext i32 [[TMP135]] to i64
+// CHECK20-NEXT:    [[TMP137:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES52]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP137]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i32 32, i1 false)
+// CHECK20-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP132]], i32* [[TMP139]], align 4
+// CHECK20-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP132]], i32* [[TMP141]], align 4
+// CHECK20-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 0
+// CHECK20-NEXT:    store i8* null, i8** [[TMP142]], align 4
+// CHECK20-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP144]], align 4
+// CHECK20-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP146]], align 4
+// CHECK20-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 1
 // CHECK20-NEXT:    store i8* null, i8** [[TMP147]], align 4
-// CHECK20-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP149]], align 4
-// CHECK20-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP151]], align 4
-// CHECK20-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP152]], align 4
-// CHECK20-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP149]], align 4
+// CHECK20-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP151]], align 4
+// CHECK20-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 2
+// CHECK20-NEXT:    store i64 [[TMP136]], i64* [[TMP152]], align 4
+// CHECK20-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 2
 // CHECK20-NEXT:    store i8* null, i8** [[TMP153]], align 4
-// CHECK20-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP155]], align 4
-// CHECK20-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP157]], align 4
-// CHECK20-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 [[TMP141]], i64* [[TMP158]], align 4
-// CHECK20-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP159]], align 4
-// CHECK20-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP161:%.*]] = bitcast i8** [[TMP160]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP139]], i32* [[TMP161]], align 4
-// CHECK20-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP163:%.*]] = bitcast i8** [[TMP162]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP139]], i32* [[TMP163]], align 4
-// CHECK20-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 3
-// CHECK20-NEXT:    store i64 4, i64* [[TMP164]], align 4
-// CHECK20-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 3
-// CHECK20-NEXT:    store i8* null, i8** [[TMP165]], align 4
-// CHECK20-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP169:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP169]], i32* [[DOTCAPTURE_EXPR_54]], align 4
-// CHECK20-NEXT:    [[TMP170:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_54]], align 4
-// CHECK20-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[TMP170]], 0
+// CHECK20-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP134]], i32* [[TMP155]], align 4
+// CHECK20-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP134]], i32* [[TMP157]], align 4
+// CHECK20-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS51]], i32 0, i32 3
+// CHECK20-NEXT:    store i8* null, i8** [[TMP158]], align 4
+// CHECK20-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS49]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS50]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES52]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP162:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP162]], i32* [[DOTCAPTURE_EXPR_54]], align 4
+// CHECK20-NEXT:    [[TMP163:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_54]], align 4
+// CHECK20-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[TMP163]], 0
 // CHECK20-NEXT:    [[DIV57:%.*]] = sdiv i32 [[SUB56]], 1
 // CHECK20-NEXT:    [[SUB58:%.*]] = sub nsw i32 [[DIV57]], 1
 // CHECK20-NEXT:    store i32 [[SUB58]], i32* [[DOTCAPTURE_EXPR_55]], align 4
-// CHECK20-NEXT:    [[TMP171:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_55]], align 4
-// CHECK20-NEXT:    [[ADD59:%.*]] = add nsw i32 [[TMP171]], 1
-// CHECK20-NEXT:    [[TMP172:%.*]] = zext i32 [[ADD59]] to i64
-// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP172]])
-// CHECK20-NEXT:    [[TMP173:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP166]], i8** [[TMP167]], i64* [[TMP168]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP174:%.*]] = icmp ne i32 [[TMP173]], 0
-// CHECK20-NEXT:    br i1 [[TMP174]], label [[OMP_OFFLOAD_FAILED60:%.*]], label [[OMP_OFFLOAD_CONT61:%.*]]
+// CHECK20-NEXT:    [[TMP164:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_55]], align 4
+// CHECK20-NEXT:    [[ADD59:%.*]] = add nsw i32 [[TMP164]], 1
+// CHECK20-NEXT:    [[TMP165:%.*]] = zext i32 [[ADD59]] to i64
+// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP165]])
+// CHECK20-NEXT:    [[TMP166:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, i32 4, i8** [[TMP159]], i8** [[TMP160]], i64* [[TMP161]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP167:%.*]] = icmp ne i32 [[TMP166]], 0
+// CHECK20-NEXT:    br i1 [[TMP167]], label [[OMP_OFFLOAD_FAILED60:%.*]], label [[OMP_OFFLOAD_CONT61:%.*]]
 // CHECK20:       omp_offload.failed60:
-// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i32 [[TMP137]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP139]]) #[[ATTR3]]
+// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(i32 [[TMP132]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP134]]) #[[ATTR3]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT61]]
 // CHECK20:       omp_offload.cont61:
-// CHECK20-NEXT:    [[TMP175:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK20-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP175]])
+// CHECK20-NEXT:    [[TMP168:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK20-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP168]])
 // CHECK20-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK20-NEXT:    [[TMP176:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP176]])
-// CHECK20-NEXT:    [[TMP177:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK20-NEXT:    ret i32 [[TMP177]]
+// CHECK20-NEXT:    [[TMP169:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP169]])
+// CHECK20-NEXT:    [[TMP170:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK20-NEXT:    ret i32 [[TMP170]]
 //
 //
 // CHECK20-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l139
@@ -28261,11 +28149,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK20-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -28346,7 +28234,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !29
 // CHECK20-NEXT:    store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29
 // CHECK20-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]), !llvm.access.group !29
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]), !llvm.access.group !29
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29
@@ -28400,7 +28288,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -28530,11 +28418,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP2]], i32* [[N_CASTED]], align 4
 // CHECK20-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -28607,7 +28495,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !35
 // CHECK20-NEXT:    store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !35
 // CHECK20-NEXT:    [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !35
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !35
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !35
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35
@@ -28638,7 +28526,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -28768,11 +28656,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK20-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -28851,7 +28739,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !41
 // CHECK20-NEXT:    store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41
 // CHECK20-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]]), !llvm.access.group !41
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]]), !llvm.access.group !41
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41
@@ -28882,7 +28770,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -28995,7 +28883,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK20-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK20-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK20-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK20-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -29037,7 +28925,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK20-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l112.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK20-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK20-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK20:       omp_offload.failed:
@@ -29055,7 +28943,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK20-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK20-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK20-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK20:       omp_offload.failed5:
@@ -29086,7 +28974,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK20-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP33:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l120.region_id, i32 2, i8** [[TMP31]], i8** [[TMP32]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK20-NEXT:    [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0
 // CHECK20-NEXT:    br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK20:       omp_offload.failed11:
@@ -29104,7 +28992,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK20-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l124.region_id, i32 1, i8** [[TMP40]], i8** [[TMP41]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK20-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK20-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK20:       omp_offload.failed17:
@@ -29135,7 +29023,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
 // CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK20-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l128.region_id, i32 2, i8** [[TMP57]], i8** [[TMP58]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK20-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
 // CHECK20-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED25:%.*]], label [[OMP_OFFLOAD_CONT26:%.*]]
 // CHECK20:       omp_offload.failed25:
@@ -29151,11 +29039,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK20-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK20-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -29201,7 +29089,7 @@ int main (int argc, char **argv) {
 // CHECK20:       omp.inner.for.body:
 // CHECK20-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47
 // CHECK20-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47
@@ -29223,7 +29111,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -29310,11 +29198,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK20-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK20-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -29360,7 +29248,7 @@ int main (int argc, char **argv) {
 // CHECK20:       omp.inner.for.body:
 // CHECK20-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !53
 // CHECK20-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53
@@ -29382,7 +29270,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -29475,11 +29363,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK20-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -29531,7 +29419,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !59
 // CHECK20-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59
 // CHECK20-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59
@@ -29553,7 +29441,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -29662,11 +29550,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK20-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK20-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -29712,7 +29600,7 @@ int main (int argc, char **argv) {
 // CHECK20:       omp.inner.for.body:
 // CHECK20-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !65
 // CHECK20-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65
@@ -29734,7 +29622,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -29825,11 +29713,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK20-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP2]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -29881,7 +29769,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !71
 // CHECK20-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71
 // CHECK20-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71
@@ -29903,7 +29791,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -29986,7 +29874,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK20-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK20-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK20-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK20-NEXT:    ret void

diff  --git a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp
index 8856d432d3625..8343aa652e098 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp
@@ -503,14 +503,14 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP92:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK1-NEXT:    [[TMP93:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK1-NEXT:    [[TMP94:%.*]] = mul nuw i64 [[TMP93]], 8
-// CHECK1-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP90]], i64* [[TMP96]], align 8
-// CHECK1-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP90]], i64* [[TMP98]], align 8
-// CHECK1-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    store i64 4, i64* [[TMP99]], align 8
+// CHECK1-NEXT:    [[TMP95:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP95]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.10 to i8*), i64 72, i1 false)
+// CHECK1-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP90]], i64* [[TMP97]], align 8
+// CHECK1-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP90]], i64* [[TMP99]], align 8
 // CHECK1-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 0
 // CHECK1-NEXT:    store i8* null, i8** [[TMP100]], align 8
 // CHECK1-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 1
@@ -519,87 +519,75 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to [10 x float]**
 // CHECK1-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP104]], align 8
-// CHECK1-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK1-NEXT:    store i64 40, i64* [[TMP105]], align 8
-// CHECK1-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 1
-// CHECK1-NEXT:    store i8* null, i8** [[TMP106]], align 8
-// CHECK1-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 2
-// CHECK1-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP108]], align 8
-// CHECK1-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 2
-// CHECK1-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP110]], align 8
-// CHECK1-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK1-NEXT:    store i64 8, i64* [[TMP111]], align 8
-// CHECK1-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 2
-// CHECK1-NEXT:    store i8* null, i8** [[TMP112]], align 8
-// CHECK1-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 1
+// CHECK1-NEXT:    store i8* null, i8** [[TMP105]], align 8
+// CHECK1-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP107]], align 8
+// CHECK1-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP109]], align 8
+// CHECK1-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 2
+// CHECK1-NEXT:    store i8* null, i8** [[TMP110]], align 8
+// CHECK1-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to float**
+// CHECK1-NEXT:    store float* [[VLA]], float** [[TMP112]], align 8
+// CHECK1-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 3
 // CHECK1-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to float**
 // CHECK1-NEXT:    store float* [[VLA]], float** [[TMP114]], align 8
-// CHECK1-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 3
-// CHECK1-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to float**
-// CHECK1-NEXT:    store float* [[VLA]], float** [[TMP116]], align 8
-// CHECK1-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK1-NEXT:    store i64 [[TMP92]], i64* [[TMP117]], align 8
-// CHECK1-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 3
-// CHECK1-NEXT:    store i8* null, i8** [[TMP118]], align 8
-// CHECK1-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK1-NEXT:    store i64 [[TMP92]], i64* [[TMP115]], align 8
+// CHECK1-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 3
+// CHECK1-NEXT:    store i8* null, i8** [[TMP116]], align 8
+// CHECK1-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to [5 x [10 x double]]**
+// CHECK1-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP118]], align 8
+// CHECK1-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 4
 // CHECK1-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to [5 x [10 x double]]**
 // CHECK1-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP120]], align 8
-// CHECK1-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 4
-// CHECK1-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to [5 x [10 x double]]**
-// CHECK1-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP122]], align 8
-// CHECK1-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK1-NEXT:    store i64 400, i64* [[TMP123]], align 8
-// CHECK1-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 4
-// CHECK1-NEXT:    store i8* null, i8** [[TMP124]], align 8
-// CHECK1-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 5
-// CHECK1-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i64*
-// CHECK1-NEXT:    store i64 5, i64* [[TMP126]], align 8
-// CHECK1-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 4
+// CHECK1-NEXT:    store i8* null, i8** [[TMP121]], align 8
+// CHECK1-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i64*
+// CHECK1-NEXT:    store i64 5, i64* [[TMP123]], align 8
+// CHECK1-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 5
+// CHECK1-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to i64*
+// CHECK1-NEXT:    store i64 5, i64* [[TMP125]], align 8
+// CHECK1-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 5
+// CHECK1-NEXT:    store i8* null, i8** [[TMP126]], align 8
+// CHECK1-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 6
 // CHECK1-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i64*
-// CHECK1-NEXT:    store i64 5, i64* [[TMP128]], align 8
-// CHECK1-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK1-NEXT:    store i64 8, i64* [[TMP129]], align 8
-// CHECK1-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 5
-// CHECK1-NEXT:    store i8* null, i8** [[TMP130]], align 8
-// CHECK1-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 6
-// CHECK1-NEXT:    [[TMP132:%.*]] = bitcast i8** [[TMP131]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP132]], align 8
-// CHECK1-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 6
-// CHECK1-NEXT:    [[TMP134:%.*]] = bitcast i8** [[TMP133]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP134]], align 8
-// CHECK1-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK1-NEXT:    store i64 8, i64* [[TMP135]], align 8
-// CHECK1-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 6
-// CHECK1-NEXT:    store i8* null, i8** [[TMP136]], align 8
-// CHECK1-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 7
-// CHECK1-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to double**
-// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP138]], align 8
-// CHECK1-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 7
-// CHECK1-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to double**
-// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP140]], align 8
-// CHECK1-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK1-NEXT:    store i64 [[TMP94]], i64* [[TMP141]], align 8
-// CHECK1-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 7
+// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP128]], align 8
+// CHECK1-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 6
+// CHECK1-NEXT:    [[TMP130:%.*]] = bitcast i8** [[TMP129]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP5]], i64* [[TMP130]], align 8
+// CHECK1-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 6
+// CHECK1-NEXT:    store i8* null, i8** [[TMP131]], align 8
+// CHECK1-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 7
+// CHECK1-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to double**
+// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP133]], align 8
+// CHECK1-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 7
+// CHECK1-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to double**
+// CHECK1-NEXT:    store double* [[VLA1]], double** [[TMP135]], align 8
+// CHECK1-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK1-NEXT:    store i64 [[TMP94]], i64* [[TMP136]], align 8
+// CHECK1-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 7
+// CHECK1-NEXT:    store i8* null, i8** [[TMP137]], align 8
+// CHECK1-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 8
+// CHECK1-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to %struct.TT**
+// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP139]], align 8
+// CHECK1-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 8
+// CHECK1-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to %struct.TT**
+// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP141]], align 8
+// CHECK1-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 8
 // CHECK1-NEXT:    store i8* null, i8** [[TMP142]], align 8
-// CHECK1-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 8
-// CHECK1-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to %struct.TT**
-// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP144]], align 8
-// CHECK1-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 8
-// CHECK1-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to %struct.TT**
-// CHECK1-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP146]], align 8
-// CHECK1-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK1-NEXT:    store i64 16, i64* [[TMP147]], align 8
-// CHECK1-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 8
-// CHECK1-NEXT:    store i8* null, i8** [[TMP148]], align 8
-// CHECK1-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK1-NEXT:    [[TMP152:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP149]], i8** [[TMP150]], i64* [[TMP151]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK1-NEXT:    [[TMP153:%.*]] = icmp ne i32 [[TMP152]], 0
-// CHECK1-NEXT:    br i1 [[TMP153]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK1-NEXT:    [[TMP146:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP143]], i8** [[TMP144]], i64* [[TMP145]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.11, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK1-NEXT:    [[TMP147:%.*]] = icmp ne i32 [[TMP146]], 0
+// CHECK1-NEXT:    br i1 [[TMP147]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
 // CHECK1:       omp_offload.failed30:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142(i64 [[TMP90]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
@@ -609,10 +597,10 @@ int bar(int n){
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142(i64 [[TMP90]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK1-NEXT:    br label [[OMP_IF_END33]]
 // CHECK1:       omp_if.end33:
-// CHECK1-NEXT:    [[TMP154:%.*]] = load i32, i32* [[A]], align 4
-// CHECK1-NEXT:    [[TMP155:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP155]])
-// CHECK1-NEXT:    ret i32 [[TMP154]]
+// CHECK1-NEXT:    [[TMP148:%.*]] = load i32, i32* [[A]], align 4
+// CHECK1-NEXT:    [[TMP149:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP149]])
+// CHECK1-NEXT:    ret i32 [[TMP148]]
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97
@@ -1340,14 +1328,14 @@ int bar(int n){
 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP8:%.*]] = mul nuw i64 2, [[TMP2]]
 // CHECK1-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2
-// CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK1-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
-// CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK1-NEXT:    store double* [[A]], double** [[TMP13]], align 8
-// CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT:    store i64 8, i64* [[TMP14]], align 8
+// CHECK1-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.13 to i8*), i64 40, i1 false)
+// CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK1-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 8
+// CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK1-NEXT:    store double* [[A]], double** [[TMP14]], align 8
 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK1-NEXT:    store i8* null, i8** [[TMP15]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1356,47 +1344,41 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i64*
 // CHECK1-NEXT:    store i64 [[TMP6]], i64* [[TMP19]], align 8
-// CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK1-NEXT:    store i64 4, i64* [[TMP20]], align 8
-// CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK1-NEXT:    store i8* null, i8** [[TMP21]], align 8
-// CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK1-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
-// CHECK1-NEXT:    store i64 2, i64* [[TMP23]], align 8
-// CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK1-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK1-NEXT:    store i64 2, i64* [[TMP25]], align 8
-// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK1-NEXT:    store i64 8, i64* [[TMP26]], align 8
-// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK1-NEXT:    store i8* null, i8** [[TMP27]], align 8
-// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK1-NEXT:    store i8* null, i8** [[TMP20]], align 8
+// CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
+// CHECK1-NEXT:    store i64 2, i64* [[TMP22]], align 8
+// CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK1-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK1-NEXT:    store i64 2, i64* [[TMP24]], align 8
+// CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK1-NEXT:    store i8* null, i8** [[TMP25]], align 8
+// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK1-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP27]], align 8
+// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK1-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
 // CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP29]], align 8
-// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK1-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
-// CHECK1-NEXT:    store i64 [[TMP2]], i64* [[TMP31]], align 8
-// CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK1-NEXT:    store i64 8, i64* [[TMP32]], align 8
-// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK1-NEXT:    store i8* null, i8** [[TMP33]], align 8
-// CHECK1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK1-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 8
-// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK1-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 8
-// CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK1-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 8
-// CHECK1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK1-NEXT:    store i8* null, i8** [[TMP39]], align 8
-// CHECK1-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK1-NEXT:    store i8* null, i8** [[TMP30]], align 8
+// CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 8
+// CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK1-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK1-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 8
+// CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK1-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 8
+// CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK1-NEXT:    store i8* null, i8** [[TMP36]], align 8
+// CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK1-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK1-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK1-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK1-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK1-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -1406,15 +1388,15 @@ int bar(int n){
 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK1-NEXT:    br label [[OMP_IF_END]]
 // CHECK1:       omp_if.end:
-// CHECK1-NEXT:    [[TMP45:%.*]] = mul nsw i64 1, [[TMP2]]
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP45]]
+// CHECK1-NEXT:    [[TMP42:%.*]] = mul nsw i64 1, [[TMP2]]
+// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP42]]
 // CHECK1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
-// CHECK1-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK1-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK1-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP47]]
-// CHECK1-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK1-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK1-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK1-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP44]]
+// CHECK1-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK1-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK1-NEXT:    ret i32 [[ADD4]]
 //
 //
@@ -1519,7 +1501,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[ADD8:%.*]] = add i32 [[TMP40]], 1
 // CHECK1-NEXT:    [[TMP41:%.*]] = zext i32 [[ADD8]] to i64
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP41]])
-// CHECK1-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l197.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK1-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l197.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK1-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK1-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
@@ -1590,7 +1572,7 @@ int bar(int n){
 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK1-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK1-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l180.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK1-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l180.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK1-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK1-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
@@ -1629,11 +1611,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32*
 // CHECK1-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK1-NEXT:    [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8
-// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12
 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1767,11 +1749,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
 // CHECK1-NEXT:    store i8 [[TMP7]], i8* [[CONV7]], align 1
 // CHECK1-NEXT:    [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
-// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], [10 x i32]* [[TMP0]])
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], [10 x i32]* [[TMP0]])
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[N:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -1933,11 +1915,11 @@ int bar(int n){
 // CHECK1-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
 // CHECK1-NEXT:    store i16 [[TMP3]], i16* [[CONV3]], align 2
 // CHECK1-NEXT:    [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8
-// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK1-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2242,14 +2224,14 @@ int bar(int n){
 // CHECK2-NEXT:    [[TMP92:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK2-NEXT:    [[TMP93:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK2-NEXT:    [[TMP94:%.*]] = mul nuw i64 [[TMP93]], 8
-// CHECK2-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP90]], i64* [[TMP96]], align 8
-// CHECK2-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP90]], i64* [[TMP98]], align 8
-// CHECK2-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK2-NEXT:    store i64 4, i64* [[TMP99]], align 8
+// CHECK2-NEXT:    [[TMP95:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP95]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.10 to i8*), i64 72, i1 false)
+// CHECK2-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP90]], i64* [[TMP97]], align 8
+// CHECK2-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP90]], i64* [[TMP99]], align 8
 // CHECK2-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 0
 // CHECK2-NEXT:    store i8* null, i8** [[TMP100]], align 8
 // CHECK2-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 1
@@ -2258,87 +2240,75 @@ int bar(int n){
 // CHECK2-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 1
 // CHECK2-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to [10 x float]**
 // CHECK2-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP104]], align 8
-// CHECK2-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK2-NEXT:    store i64 40, i64* [[TMP105]], align 8
-// CHECK2-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 1
-// CHECK2-NEXT:    store i8* null, i8** [[TMP106]], align 8
-// CHECK2-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 2
-// CHECK2-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP108]], align 8
-// CHECK2-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 2
-// CHECK2-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP110]], align 8
-// CHECK2-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK2-NEXT:    store i64 8, i64* [[TMP111]], align 8
-// CHECK2-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 2
-// CHECK2-NEXT:    store i8* null, i8** [[TMP112]], align 8
-// CHECK2-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 3
+// CHECK2-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 1
+// CHECK2-NEXT:    store i8* null, i8** [[TMP105]], align 8
+// CHECK2-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP107]], align 8
+// CHECK2-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP109]], align 8
+// CHECK2-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 2
+// CHECK2-NEXT:    store i8* null, i8** [[TMP110]], align 8
+// CHECK2-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 3
+// CHECK2-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to float**
+// CHECK2-NEXT:    store float* [[VLA]], float** [[TMP112]], align 8
+// CHECK2-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 3
 // CHECK2-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to float**
 // CHECK2-NEXT:    store float* [[VLA]], float** [[TMP114]], align 8
-// CHECK2-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 3
-// CHECK2-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to float**
-// CHECK2-NEXT:    store float* [[VLA]], float** [[TMP116]], align 8
-// CHECK2-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK2-NEXT:    store i64 [[TMP92]], i64* [[TMP117]], align 8
-// CHECK2-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 3
-// CHECK2-NEXT:    store i8* null, i8** [[TMP118]], align 8
-// CHECK2-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 4
+// CHECK2-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK2-NEXT:    store i64 [[TMP92]], i64* [[TMP115]], align 8
+// CHECK2-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 3
+// CHECK2-NEXT:    store i8* null, i8** [[TMP116]], align 8
+// CHECK2-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 4
+// CHECK2-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to [5 x [10 x double]]**
+// CHECK2-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP118]], align 8
+// CHECK2-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 4
 // CHECK2-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to [5 x [10 x double]]**
 // CHECK2-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP120]], align 8
-// CHECK2-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 4
-// CHECK2-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to [5 x [10 x double]]**
-// CHECK2-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP122]], align 8
-// CHECK2-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK2-NEXT:    store i64 400, i64* [[TMP123]], align 8
-// CHECK2-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 4
-// CHECK2-NEXT:    store i8* null, i8** [[TMP124]], align 8
-// CHECK2-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 5
-// CHECK2-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i64*
-// CHECK2-NEXT:    store i64 5, i64* [[TMP126]], align 8
-// CHECK2-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 5
+// CHECK2-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 4
+// CHECK2-NEXT:    store i8* null, i8** [[TMP121]], align 8
+// CHECK2-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 5
+// CHECK2-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i64*
+// CHECK2-NEXT:    store i64 5, i64* [[TMP123]], align 8
+// CHECK2-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 5
+// CHECK2-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to i64*
+// CHECK2-NEXT:    store i64 5, i64* [[TMP125]], align 8
+// CHECK2-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 5
+// CHECK2-NEXT:    store i8* null, i8** [[TMP126]], align 8
+// CHECK2-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 6
 // CHECK2-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i64*
-// CHECK2-NEXT:    store i64 5, i64* [[TMP128]], align 8
-// CHECK2-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK2-NEXT:    store i64 8, i64* [[TMP129]], align 8
-// CHECK2-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 5
-// CHECK2-NEXT:    store i8* null, i8** [[TMP130]], align 8
-// CHECK2-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 6
-// CHECK2-NEXT:    [[TMP132:%.*]] = bitcast i8** [[TMP131]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP5]], i64* [[TMP132]], align 8
-// CHECK2-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 6
-// CHECK2-NEXT:    [[TMP134:%.*]] = bitcast i8** [[TMP133]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP5]], i64* [[TMP134]], align 8
-// CHECK2-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK2-NEXT:    store i64 8, i64* [[TMP135]], align 8
-// CHECK2-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 6
-// CHECK2-NEXT:    store i8* null, i8** [[TMP136]], align 8
-// CHECK2-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 7
-// CHECK2-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to double**
-// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP138]], align 8
-// CHECK2-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 7
-// CHECK2-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to double**
-// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP140]], align 8
-// CHECK2-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK2-NEXT:    store i64 [[TMP94]], i64* [[TMP141]], align 8
-// CHECK2-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 7
+// CHECK2-NEXT:    store i64 [[TMP5]], i64* [[TMP128]], align 8
+// CHECK2-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 6
+// CHECK2-NEXT:    [[TMP130:%.*]] = bitcast i8** [[TMP129]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP5]], i64* [[TMP130]], align 8
+// CHECK2-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 6
+// CHECK2-NEXT:    store i8* null, i8** [[TMP131]], align 8
+// CHECK2-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 7
+// CHECK2-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to double**
+// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP133]], align 8
+// CHECK2-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 7
+// CHECK2-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to double**
+// CHECK2-NEXT:    store double* [[VLA1]], double** [[TMP135]], align 8
+// CHECK2-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK2-NEXT:    store i64 [[TMP94]], i64* [[TMP136]], align 8
+// CHECK2-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 7
+// CHECK2-NEXT:    store i8* null, i8** [[TMP137]], align 8
+// CHECK2-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 8
+// CHECK2-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to %struct.TT**
+// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP139]], align 8
+// CHECK2-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 8
+// CHECK2-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to %struct.TT**
+// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP141]], align 8
+// CHECK2-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 8
 // CHECK2-NEXT:    store i8* null, i8** [[TMP142]], align 8
-// CHECK2-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 8
-// CHECK2-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to %struct.TT**
-// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP144]], align 8
-// CHECK2-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 8
-// CHECK2-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to %struct.TT**
-// CHECK2-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP146]], align 8
-// CHECK2-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK2-NEXT:    store i64 16, i64* [[TMP147]], align 8
-// CHECK2-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 8
-// CHECK2-NEXT:    store i8* null, i8** [[TMP148]], align 8
-// CHECK2-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK2-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK2-NEXT:    [[TMP152:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP149]], i8** [[TMP150]], i64* [[TMP151]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK2-NEXT:    [[TMP153:%.*]] = icmp ne i32 [[TMP152]], 0
-// CHECK2-NEXT:    br i1 [[TMP153]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK2-NEXT:    [[TMP146:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP143]], i8** [[TMP144]], i64* [[TMP145]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.11, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK2-NEXT:    [[TMP147:%.*]] = icmp ne i32 [[TMP146]], 0
+// CHECK2-NEXT:    br i1 [[TMP147]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
 // CHECK2:       omp_offload.failed30:
 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142(i64 [[TMP90]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
@@ -2348,10 +2318,10 @@ int bar(int n){
 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142(i64 [[TMP90]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK2-NEXT:    br label [[OMP_IF_END33]]
 // CHECK2:       omp_if.end33:
-// CHECK2-NEXT:    [[TMP154:%.*]] = load i32, i32* [[A]], align 4
-// CHECK2-NEXT:    [[TMP155:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP155]])
-// CHECK2-NEXT:    ret i32 [[TMP154]]
+// CHECK2-NEXT:    [[TMP148:%.*]] = load i32, i32* [[A]], align 4
+// CHECK2-NEXT:    [[TMP149:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP149]])
+// CHECK2-NEXT:    ret i32 [[TMP148]]
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97
@@ -3079,14 +3049,14 @@ int bar(int n){
 // CHECK2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP8:%.*]] = mul nuw i64 2, [[TMP2]]
 // CHECK2-NEXT:    [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 2
-// CHECK2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK2-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8
-// CHECK2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK2-NEXT:    store double* [[A]], double** [[TMP13]], align 8
-// CHECK2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK2-NEXT:    store i64 8, i64* [[TMP14]], align 8
+// CHECK2-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.13 to i8*), i64 40, i1 false)
+// CHECK2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK2-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 8
+// CHECK2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK2-NEXT:    store double* [[A]], double** [[TMP14]], align 8
 // CHECK2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK2-NEXT:    store i8* null, i8** [[TMP15]], align 8
 // CHECK2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -3095,47 +3065,41 @@ int bar(int n){
 // CHECK2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK2-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i64*
 // CHECK2-NEXT:    store i64 [[TMP6]], i64* [[TMP19]], align 8
-// CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK2-NEXT:    store i64 4, i64* [[TMP20]], align 8
-// CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK2-NEXT:    store i8* null, i8** [[TMP21]], align 8
-// CHECK2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK2-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
-// CHECK2-NEXT:    store i64 2, i64* [[TMP23]], align 8
-// CHECK2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK2-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK2-NEXT:    store i64 2, i64* [[TMP25]], align 8
-// CHECK2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK2-NEXT:    store i64 8, i64* [[TMP26]], align 8
-// CHECK2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK2-NEXT:    store i8* null, i8** [[TMP27]], align 8
-// CHECK2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK2-NEXT:    store i8* null, i8** [[TMP20]], align 8
+// CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
+// CHECK2-NEXT:    store i64 2, i64* [[TMP22]], align 8
+// CHECK2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK2-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK2-NEXT:    store i64 2, i64* [[TMP24]], align 8
+// CHECK2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK2-NEXT:    store i8* null, i8** [[TMP25]], align 8
+// CHECK2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK2-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP27]], align 8
+// CHECK2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK2-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
 // CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP29]], align 8
-// CHECK2-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK2-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
-// CHECK2-NEXT:    store i64 [[TMP2]], i64* [[TMP31]], align 8
-// CHECK2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK2-NEXT:    store i64 8, i64* [[TMP32]], align 8
-// CHECK2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK2-NEXT:    store i8* null, i8** [[TMP33]], align 8
-// CHECK2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK2-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 8
-// CHECK2-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK2-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 8
-// CHECK2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK2-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 8
-// CHECK2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK2-NEXT:    store i8* null, i8** [[TMP39]], align 8
-// CHECK2-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK2-NEXT:    store i8* null, i8** [[TMP30]], align 8
+// CHECK2-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK2-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 8
+// CHECK2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK2-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK2-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 8
+// CHECK2-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK2-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 8
+// CHECK2-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK2-NEXT:    store i8* null, i8** [[TMP36]], align 8
+// CHECK2-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK2-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK2-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK2-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK2-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK2-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK2-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK2-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK2:       omp_offload.failed:
 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK2-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -3145,15 +3109,15 @@ int bar(int n){
 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK2-NEXT:    br label [[OMP_IF_END]]
 // CHECK2:       omp_if.end:
-// CHECK2-NEXT:    [[TMP45:%.*]] = mul nsw i64 1, [[TMP2]]
-// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP45]]
+// CHECK2-NEXT:    [[TMP42:%.*]] = mul nsw i64 1, [[TMP2]]
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP42]]
 // CHECK2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
-// CHECK2-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK2-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK2-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP47]]
-// CHECK2-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK2-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK2-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK2-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK2-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP44]]
+// CHECK2-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK2-NEXT:    ret i32 [[ADD4]]
 //
 //
@@ -3258,7 +3222,7 @@ int bar(int n){
 // CHECK2-NEXT:    [[ADD8:%.*]] = add i32 [[TMP40]], 1
 // CHECK2-NEXT:    [[TMP41:%.*]] = zext i32 [[ADD8]] to i64
 // CHECK2-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP41]])
-// CHECK2-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l197.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK2-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l197.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK2-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK2-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK2:       omp_offload.failed:
@@ -3329,7 +3293,7 @@ int bar(int n){
 // CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK2-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK2-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l180.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK2-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l180.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK2-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK2-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK2:       omp_offload.failed:
@@ -3368,11 +3332,11 @@ int bar(int n){
 // CHECK2-NEXT:    [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32*
 // CHECK2-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK2-NEXT:    [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]])
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..12
 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3506,11 +3470,11 @@ int bar(int n){
 // CHECK2-NEXT:    [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
 // CHECK2-NEXT:    store i8 [[TMP7]], i8* [[CONV7]], align 1
 // CHECK2-NEXT:    [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], [10 x i32]* [[TMP0]])
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], [10 x i32]* [[TMP0]])
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..13
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[N:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3672,11 +3636,11 @@ int bar(int n){
 // CHECK2-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
 // CHECK2-NEXT:    store i16 [[TMP3]], i16* [[CONV3]], align 2
 // CHECK2-NEXT:    [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3976,14 +3940,14 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP92:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK3-NEXT:    [[TMP93:%.*]] = mul nuw i32 [[TMP92]], 8
 // CHECK3-NEXT:    [[TMP94:%.*]] = sext i32 [[TMP93]] to i64
-// CHECK3-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP88]], i32* [[TMP96]], align 4
-// CHECK3-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP88]], i32* [[TMP98]], align 4
-// CHECK3-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    store i64 4, i64* [[TMP99]], align 4
+// CHECK3-NEXT:    [[TMP95:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP95]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.10 to i8*), i32 72, i1 false)
+// CHECK3-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP88]], i32* [[TMP97]], align 4
+// CHECK3-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP88]], i32* [[TMP99]], align 4
 // CHECK3-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 0
 // CHECK3-NEXT:    store i8* null, i8** [[TMP100]], align 4
 // CHECK3-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
@@ -3992,87 +3956,75 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to [10 x float]**
 // CHECK3-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP104]], align 4
-// CHECK3-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK3-NEXT:    store i64 40, i64* [[TMP105]], align 4
-// CHECK3-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 1
-// CHECK3-NEXT:    store i8* null, i8** [[TMP106]], align 4
-// CHECK3-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
-// CHECK3-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP108]], align 4
-// CHECK3-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
-// CHECK3-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP110]], align 4
-// CHECK3-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK3-NEXT:    store i64 4, i64* [[TMP111]], align 4
-// CHECK3-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 2
-// CHECK3-NEXT:    store i8* null, i8** [[TMP112]], align 4
-// CHECK3-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 1
+// CHECK3-NEXT:    store i8* null, i8** [[TMP105]], align 4
+// CHECK3-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP107]], align 4
+// CHECK3-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP109]], align 4
+// CHECK3-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 2
+// CHECK3-NEXT:    store i8* null, i8** [[TMP110]], align 4
+// CHECK3-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to float**
+// CHECK3-NEXT:    store float* [[VLA]], float** [[TMP112]], align 4
+// CHECK3-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
 // CHECK3-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to float**
 // CHECK3-NEXT:    store float* [[VLA]], float** [[TMP114]], align 4
-// CHECK3-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
-// CHECK3-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to float**
-// CHECK3-NEXT:    store float* [[VLA]], float** [[TMP116]], align 4
-// CHECK3-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK3-NEXT:    store i64 [[TMP91]], i64* [[TMP117]], align 4
-// CHECK3-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 3
-// CHECK3-NEXT:    store i8* null, i8** [[TMP118]], align 4
-// CHECK3-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK3-NEXT:    store i64 [[TMP91]], i64* [[TMP115]], align 4
+// CHECK3-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 3
+// CHECK3-NEXT:    store i8* null, i8** [[TMP116]], align 4
+// CHECK3-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to [5 x [10 x double]]**
+// CHECK3-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP118]], align 4
+// CHECK3-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 4
 // CHECK3-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to [5 x [10 x double]]**
 // CHECK3-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP120]], align 4
-// CHECK3-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 4
-// CHECK3-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to [5 x [10 x double]]**
-// CHECK3-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP122]], align 4
-// CHECK3-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK3-NEXT:    store i64 400, i64* [[TMP123]], align 4
-// CHECK3-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 4
-// CHECK3-NEXT:    store i8* null, i8** [[TMP124]], align 4
-// CHECK3-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 5
-// CHECK3-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i32*
-// CHECK3-NEXT:    store i32 5, i32* [[TMP126]], align 4
-// CHECK3-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 5
+// CHECK3-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 4
+// CHECK3-NEXT:    store i8* null, i8** [[TMP121]], align 4
+// CHECK3-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 5
+// CHECK3-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i32*
+// CHECK3-NEXT:    store i32 5, i32* [[TMP123]], align 4
+// CHECK3-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 5
+// CHECK3-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to i32*
+// CHECK3-NEXT:    store i32 5, i32* [[TMP125]], align 4
+// CHECK3-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 5
+// CHECK3-NEXT:    store i8* null, i8** [[TMP126]], align 4
+// CHECK3-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 6
 // CHECK3-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i32*
-// CHECK3-NEXT:    store i32 5, i32* [[TMP128]], align 4
-// CHECK3-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK3-NEXT:    store i64 4, i64* [[TMP129]], align 4
-// CHECK3-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 5
-// CHECK3-NEXT:    store i8* null, i8** [[TMP130]], align 4
-// CHECK3-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 6
-// CHECK3-NEXT:    [[TMP132:%.*]] = bitcast i8** [[TMP131]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP132]], align 4
-// CHECK3-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 6
-// CHECK3-NEXT:    [[TMP134:%.*]] = bitcast i8** [[TMP133]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP134]], align 4
-// CHECK3-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK3-NEXT:    store i64 4, i64* [[TMP135]], align 4
-// CHECK3-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 6
-// CHECK3-NEXT:    store i8* null, i8** [[TMP136]], align 4
-// CHECK3-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 7
-// CHECK3-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to double**
-// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP138]], align 4
-// CHECK3-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 7
-// CHECK3-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to double**
-// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP140]], align 4
-// CHECK3-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK3-NEXT:    store i64 [[TMP94]], i64* [[TMP141]], align 4
-// CHECK3-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 7
+// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP128]], align 4
+// CHECK3-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 6
+// CHECK3-NEXT:    [[TMP130:%.*]] = bitcast i8** [[TMP129]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[TMP130]], align 4
+// CHECK3-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 6
+// CHECK3-NEXT:    store i8* null, i8** [[TMP131]], align 4
+// CHECK3-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 7
+// CHECK3-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to double**
+// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP133]], align 4
+// CHECK3-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 7
+// CHECK3-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to double**
+// CHECK3-NEXT:    store double* [[VLA1]], double** [[TMP135]], align 4
+// CHECK3-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK3-NEXT:    store i64 [[TMP94]], i64* [[TMP136]], align 4
+// CHECK3-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 7
+// CHECK3-NEXT:    store i8* null, i8** [[TMP137]], align 4
+// CHECK3-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 8
+// CHECK3-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to %struct.TT**
+// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP139]], align 4
+// CHECK3-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 8
+// CHECK3-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to %struct.TT**
+// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP141]], align 4
+// CHECK3-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 8
 // CHECK3-NEXT:    store i8* null, i8** [[TMP142]], align 4
-// CHECK3-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 8
-// CHECK3-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to %struct.TT**
-// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP144]], align 4
-// CHECK3-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 8
-// CHECK3-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to %struct.TT**
-// CHECK3-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP146]], align 4
-// CHECK3-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK3-NEXT:    store i64 12, i64* [[TMP147]], align 4
-// CHECK3-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 8
-// CHECK3-NEXT:    store i8* null, i8** [[TMP148]], align 4
-// CHECK3-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK3-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK3-NEXT:    [[TMP152:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP149]], i8** [[TMP150]], i64* [[TMP151]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK3-NEXT:    [[TMP153:%.*]] = icmp ne i32 [[TMP152]], 0
-// CHECK3-NEXT:    br i1 [[TMP153]], label [[OMP_OFFLOAD_FAILED25:%.*]], label [[OMP_OFFLOAD_CONT26:%.*]]
+// CHECK3-NEXT:    [[TMP146:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP143]], i8** [[TMP144]], i64* [[TMP145]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.11, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK3-NEXT:    [[TMP147:%.*]] = icmp ne i32 [[TMP146]], 0
+// CHECK3-NEXT:    br i1 [[TMP147]], label [[OMP_OFFLOAD_FAILED25:%.*]], label [[OMP_OFFLOAD_CONT26:%.*]]
 // CHECK3:       omp_offload.failed25:
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142(i32 [[TMP88]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT26]]
@@ -4082,10 +4034,10 @@ int bar(int n){
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142(i32 [[TMP88]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK3-NEXT:    br label [[OMP_IF_END28]]
 // CHECK3:       omp_if.end28:
-// CHECK3-NEXT:    [[TMP154:%.*]] = load i32, i32* [[A]], align 4
-// CHECK3-NEXT:    [[TMP155:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP155]])
-// CHECK3-NEXT:    ret i32 [[TMP154]]
+// CHECK3-NEXT:    [[TMP148:%.*]] = load i32, i32* [[A]], align 4
+// CHECK3-NEXT:    [[TMP149:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP149]])
+// CHECK3-NEXT:    ret i32 [[TMP148]]
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97
@@ -4799,14 +4751,14 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP7:%.*]] = mul nuw i32 2, [[TMP1]]
 // CHECK3-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 2
 // CHECK3-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-// CHECK3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK3-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
-// CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK3-NEXT:    store double* [[A]], double** [[TMP13]], align 4
-// CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK3-NEXT:    store i64 8, i64* [[TMP14]], align 4
+// CHECK3-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.13 to i8*), i32 40, i1 false)
+// CHECK3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK3-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 4
+// CHECK3-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK3-NEXT:    store double* [[A]], double** [[TMP14]], align 4
 // CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK3-NEXT:    store i8* null, i8** [[TMP15]], align 4
 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -4815,47 +4767,41 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32*
 // CHECK3-NEXT:    store i32 [[TMP5]], i32* [[TMP19]], align 4
-// CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK3-NEXT:    store i64 4, i64* [[TMP20]], align 4
-// CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK3-NEXT:    store i8* null, i8** [[TMP21]], align 4
-// CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK3-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
-// CHECK3-NEXT:    store i32 2, i32* [[TMP23]], align 4
-// CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK3-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
-// CHECK3-NEXT:    store i32 2, i32* [[TMP25]], align 4
-// CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK3-NEXT:    store i64 4, i64* [[TMP26]], align 4
-// CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK3-NEXT:    store i8* null, i8** [[TMP27]], align 4
-// CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK3-NEXT:    store i8* null, i8** [[TMP20]], align 4
+// CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
+// CHECK3-NEXT:    store i32 2, i32* [[TMP22]], align 4
+// CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK3-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
+// CHECK3-NEXT:    store i32 2, i32* [[TMP24]], align 4
+// CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK3-NEXT:    store i8* null, i8** [[TMP25]], align 4
+// CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK3-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP27]], align 4
+// CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK3-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
 // CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP29]], align 4
-// CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK3-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i32*
-// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TMP31]], align 4
-// CHECK3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK3-NEXT:    store i64 4, i64* [[TMP32]], align 4
-// CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK3-NEXT:    store i8* null, i8** [[TMP33]], align 4
-// CHECK3-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK3-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 4
-// CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK3-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 4
-// CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK3-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 4
-// CHECK3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK3-NEXT:    store i8* null, i8** [[TMP39]], align 4
-// CHECK3-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK3-NEXT:    store i8* null, i8** [[TMP30]], align 4
+// CHECK3-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 4
+// CHECK3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK3-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK3-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 4
+// CHECK3-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK3-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 4
+// CHECK3-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK3-NEXT:    store i8* null, i8** [[TMP36]], align 4
+// CHECK3-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK3-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK3-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK3-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK3-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK3-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK3-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK3:       omp_offload.failed:
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -4865,15 +4811,15 @@ int bar(int n){
 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK3-NEXT:    br label [[OMP_IF_END]]
 // CHECK3:       omp_if.end:
-// CHECK3-NEXT:    [[TMP45:%.*]] = mul nsw i32 1, [[TMP1]]
-// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP45]]
+// CHECK3-NEXT:    [[TMP42:%.*]] = mul nsw i32 1, [[TMP1]]
+// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP42]]
 // CHECK3-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
-// CHECK3-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK3-NEXT:    [[CONV:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK3-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP47]]
-// CHECK3-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK3-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK3-NEXT:    [[CONV:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK3-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK3-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP44]]
+// CHECK3-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK3-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK3-NEXT:    ret i32 [[ADD3]]
 //
 //
@@ -4976,7 +4922,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[ADD6:%.*]] = add i32 [[TMP40]], 1
 // CHECK3-NEXT:    [[TMP41:%.*]] = zext i32 [[ADD6]] to i64
 // CHECK3-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP41]])
-// CHECK3-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l197.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK3-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l197.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK3-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK3-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK3:       omp_offload.failed:
@@ -5046,7 +4992,7 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK3-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK3-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l180.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK3-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l180.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK3-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK3-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK3:       omp_offload.failed:
@@ -5083,11 +5029,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
 // CHECK3-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..12
 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5216,11 +5162,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
 // CHECK3-NEXT:    store i8 [[TMP7]], i8* [[CONV3]], align 1
 // CHECK3-NEXT:    [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], [10 x i32]* [[TMP0]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], [10 x i32]* [[TMP0]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[N:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5378,11 +5324,11 @@ int bar(int n){
 // CHECK3-NEXT:    [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
 // CHECK3-NEXT:    store i16 [[TMP3]], i16* [[CONV1]], align 2
 // CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK3-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5681,14 +5627,14 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP92:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK4-NEXT:    [[TMP93:%.*]] = mul nuw i32 [[TMP92]], 8
 // CHECK4-NEXT:    [[TMP94:%.*]] = sext i32 [[TMP93]] to i64
-// CHECK4-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP88]], i32* [[TMP96]], align 4
-// CHECK4-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP88]], i32* [[TMP98]], align 4
-// CHECK4-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK4-NEXT:    store i64 4, i64* [[TMP99]], align 4
+// CHECK4-NEXT:    [[TMP95:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK4-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP95]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.10 to i8*), i32 72, i1 false)
+// CHECK4-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP88]], i32* [[TMP97]], align 4
+// CHECK4-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP88]], i32* [[TMP99]], align 4
 // CHECK4-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 0
 // CHECK4-NEXT:    store i8* null, i8** [[TMP100]], align 4
 // CHECK4-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
@@ -5697,87 +5643,75 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
 // CHECK4-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to [10 x float]**
 // CHECK4-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP104]], align 4
-// CHECK4-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK4-NEXT:    store i64 40, i64* [[TMP105]], align 4
-// CHECK4-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 1
-// CHECK4-NEXT:    store i8* null, i8** [[TMP106]], align 4
-// CHECK4-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
-// CHECK4-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP108]], align 4
-// CHECK4-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
-// CHECK4-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP110]], align 4
-// CHECK4-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK4-NEXT:    store i64 4, i64* [[TMP111]], align 4
-// CHECK4-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 2
-// CHECK4-NEXT:    store i8* null, i8** [[TMP112]], align 4
-// CHECK4-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
+// CHECK4-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 1
+// CHECK4-NEXT:    store i8* null, i8** [[TMP105]], align 4
+// CHECK4-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
+// CHECK4-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP107]], align 4
+// CHECK4-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
+// CHECK4-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP109]], align 4
+// CHECK4-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 2
+// CHECK4-NEXT:    store i8* null, i8** [[TMP110]], align 4
+// CHECK4-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
+// CHECK4-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to float**
+// CHECK4-NEXT:    store float* [[VLA]], float** [[TMP112]], align 4
+// CHECK4-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
 // CHECK4-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to float**
 // CHECK4-NEXT:    store float* [[VLA]], float** [[TMP114]], align 4
-// CHECK4-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
-// CHECK4-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to float**
-// CHECK4-NEXT:    store float* [[VLA]], float** [[TMP116]], align 4
-// CHECK4-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK4-NEXT:    store i64 [[TMP91]], i64* [[TMP117]], align 4
-// CHECK4-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 3
-// CHECK4-NEXT:    store i8* null, i8** [[TMP118]], align 4
-// CHECK4-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 4
+// CHECK4-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK4-NEXT:    store i64 [[TMP91]], i64* [[TMP115]], align 4
+// CHECK4-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 3
+// CHECK4-NEXT:    store i8* null, i8** [[TMP116]], align 4
+// CHECK4-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 4
+// CHECK4-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to [5 x [10 x double]]**
+// CHECK4-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP118]], align 4
+// CHECK4-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 4
 // CHECK4-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to [5 x [10 x double]]**
 // CHECK4-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP120]], align 4
-// CHECK4-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 4
-// CHECK4-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to [5 x [10 x double]]**
-// CHECK4-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP122]], align 4
-// CHECK4-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK4-NEXT:    store i64 400, i64* [[TMP123]], align 4
-// CHECK4-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 4
-// CHECK4-NEXT:    store i8* null, i8** [[TMP124]], align 4
-// CHECK4-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 5
-// CHECK4-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i32*
-// CHECK4-NEXT:    store i32 5, i32* [[TMP126]], align 4
-// CHECK4-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 5
+// CHECK4-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 4
+// CHECK4-NEXT:    store i8* null, i8** [[TMP121]], align 4
+// CHECK4-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 5
+// CHECK4-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i32*
+// CHECK4-NEXT:    store i32 5, i32* [[TMP123]], align 4
+// CHECK4-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 5
+// CHECK4-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to i32*
+// CHECK4-NEXT:    store i32 5, i32* [[TMP125]], align 4
+// CHECK4-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 5
+// CHECK4-NEXT:    store i8* null, i8** [[TMP126]], align 4
+// CHECK4-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 6
 // CHECK4-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i32*
-// CHECK4-NEXT:    store i32 5, i32* [[TMP128]], align 4
-// CHECK4-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK4-NEXT:    store i64 4, i64* [[TMP129]], align 4
-// CHECK4-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 5
-// CHECK4-NEXT:    store i8* null, i8** [[TMP130]], align 4
-// CHECK4-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 6
-// CHECK4-NEXT:    [[TMP132:%.*]] = bitcast i8** [[TMP131]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[TMP132]], align 4
-// CHECK4-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 6
-// CHECK4-NEXT:    [[TMP134:%.*]] = bitcast i8** [[TMP133]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[TMP134]], align 4
-// CHECK4-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK4-NEXT:    store i64 4, i64* [[TMP135]], align 4
-// CHECK4-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 6
-// CHECK4-NEXT:    store i8* null, i8** [[TMP136]], align 4
-// CHECK4-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 7
-// CHECK4-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to double**
-// CHECK4-NEXT:    store double* [[VLA1]], double** [[TMP138]], align 4
-// CHECK4-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 7
-// CHECK4-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to double**
-// CHECK4-NEXT:    store double* [[VLA1]], double** [[TMP140]], align 4
-// CHECK4-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK4-NEXT:    store i64 [[TMP94]], i64* [[TMP141]], align 4
-// CHECK4-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 7
+// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[TMP128]], align 4
+// CHECK4-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 6
+// CHECK4-NEXT:    [[TMP130:%.*]] = bitcast i8** [[TMP129]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[TMP130]], align 4
+// CHECK4-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 6
+// CHECK4-NEXT:    store i8* null, i8** [[TMP131]], align 4
+// CHECK4-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 7
+// CHECK4-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to double**
+// CHECK4-NEXT:    store double* [[VLA1]], double** [[TMP133]], align 4
+// CHECK4-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 7
+// CHECK4-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to double**
+// CHECK4-NEXT:    store double* [[VLA1]], double** [[TMP135]], align 4
+// CHECK4-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK4-NEXT:    store i64 [[TMP94]], i64* [[TMP136]], align 4
+// CHECK4-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 7
+// CHECK4-NEXT:    store i8* null, i8** [[TMP137]], align 4
+// CHECK4-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 8
+// CHECK4-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to %struct.TT**
+// CHECK4-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP139]], align 4
+// CHECK4-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 8
+// CHECK4-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to %struct.TT**
+// CHECK4-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP141]], align 4
+// CHECK4-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 8
 // CHECK4-NEXT:    store i8* null, i8** [[TMP142]], align 4
-// CHECK4-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 8
-// CHECK4-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to %struct.TT**
-// CHECK4-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP144]], align 4
-// CHECK4-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 8
-// CHECK4-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to %struct.TT**
-// CHECK4-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP146]], align 4
-// CHECK4-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK4-NEXT:    store i64 12, i64* [[TMP147]], align 4
-// CHECK4-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 8
-// CHECK4-NEXT:    store i8* null, i8** [[TMP148]], align 4
-// CHECK4-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK4-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK4-NEXT:    [[TMP152:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP149]], i8** [[TMP150]], i64* [[TMP151]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK4-NEXT:    [[TMP153:%.*]] = icmp ne i32 [[TMP152]], 0
-// CHECK4-NEXT:    br i1 [[TMP153]], label [[OMP_OFFLOAD_FAILED25:%.*]], label [[OMP_OFFLOAD_CONT26:%.*]]
+// CHECK4-NEXT:    [[TMP146:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP143]], i8** [[TMP144]], i64* [[TMP145]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.11, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK4-NEXT:    [[TMP147:%.*]] = icmp ne i32 [[TMP146]], 0
+// CHECK4-NEXT:    br i1 [[TMP147]], label [[OMP_OFFLOAD_FAILED25:%.*]], label [[OMP_OFFLOAD_CONT26:%.*]]
 // CHECK4:       omp_offload.failed25:
 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142(i32 [[TMP88]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK4-NEXT:    br label [[OMP_OFFLOAD_CONT26]]
@@ -5787,10 +5721,10 @@ int bar(int n){
 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142(i32 [[TMP88]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK4-NEXT:    br label [[OMP_IF_END28]]
 // CHECK4:       omp_if.end28:
-// CHECK4-NEXT:    [[TMP154:%.*]] = load i32, i32* [[A]], align 4
-// CHECK4-NEXT:    [[TMP155:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP155]])
-// CHECK4-NEXT:    ret i32 [[TMP154]]
+// CHECK4-NEXT:    [[TMP148:%.*]] = load i32, i32* [[A]], align 4
+// CHECK4-NEXT:    [[TMP149:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP149]])
+// CHECK4-NEXT:    ret i32 [[TMP148]]
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97
@@ -6504,14 +6438,14 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP7:%.*]] = mul nuw i32 2, [[TMP1]]
 // CHECK4-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 2
 // CHECK4-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-// CHECK4-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1**
-// CHECK4-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4
-// CHECK4-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double**
-// CHECK4-NEXT:    store double* [[A]], double** [[TMP13]], align 4
-// CHECK4-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK4-NEXT:    store i64 8, i64* [[TMP14]], align 4
+// CHECK4-NEXT:    [[TMP10:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK4-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.13 to i8*), i32 40, i1 false)
+// CHECK4-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S1**
+// CHECK4-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP12]], align 4
+// CHECK4-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to double**
+// CHECK4-NEXT:    store double* [[A]], double** [[TMP14]], align 4
 // CHECK4-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK4-NEXT:    store i8* null, i8** [[TMP15]], align 4
 // CHECK4-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -6520,47 +6454,41 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK4-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32*
 // CHECK4-NEXT:    store i32 [[TMP5]], i32* [[TMP19]], align 4
-// CHECK4-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK4-NEXT:    store i64 4, i64* [[TMP20]], align 4
-// CHECK4-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK4-NEXT:    store i8* null, i8** [[TMP21]], align 4
-// CHECK4-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK4-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
-// CHECK4-NEXT:    store i32 2, i32* [[TMP23]], align 4
-// CHECK4-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK4-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
-// CHECK4-NEXT:    store i32 2, i32* [[TMP25]], align 4
-// CHECK4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK4-NEXT:    store i64 4, i64* [[TMP26]], align 4
-// CHECK4-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK4-NEXT:    store i8* null, i8** [[TMP27]], align 4
-// CHECK4-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK4-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK4-NEXT:    store i8* null, i8** [[TMP20]], align 4
+// CHECK4-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK4-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
+// CHECK4-NEXT:    store i32 2, i32* [[TMP22]], align 4
+// CHECK4-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK4-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
+// CHECK4-NEXT:    store i32 2, i32* [[TMP24]], align 4
+// CHECK4-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK4-NEXT:    store i8* null, i8** [[TMP25]], align 4
+// CHECK4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK4-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP27]], align 4
+// CHECK4-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK4-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
 // CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP29]], align 4
-// CHECK4-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK4-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i32*
-// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TMP31]], align 4
-// CHECK4-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK4-NEXT:    store i64 4, i64* [[TMP32]], align 4
-// CHECK4-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK4-NEXT:    store i8* null, i8** [[TMP33]], align 4
-// CHECK4-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK4-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
-// CHECK4-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 4
-// CHECK4-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK4-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
-// CHECK4-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 4
-// CHECK4-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK4-NEXT:    store i64 [[TMP9]], i64* [[TMP38]], align 4
-// CHECK4-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK4-NEXT:    store i8* null, i8** [[TMP39]], align 4
-// CHECK4-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK4-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK4-NEXT:    store i8* null, i8** [[TMP30]], align 4
+// CHECK4-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK4-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i16**
+// CHECK4-NEXT:    store i16* [[VLA]], i16** [[TMP32]], align 4
+// CHECK4-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK4-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i16**
+// CHECK4-NEXT:    store i16* [[VLA]], i16** [[TMP34]], align 4
+// CHECK4-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK4-NEXT:    store i64 [[TMP9]], i64* [[TMP35]], align 4
+// CHECK4-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK4-NEXT:    store i8* null, i8** [[TMP36]], align 4
+// CHECK4-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK4-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK4-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK4-NEXT:    [[TMP43:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215.region_id, i32 5, i8** [[TMP40]], i8** [[TMP41]], i64* [[TMP42]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK4-NEXT:    [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0
-// CHECK4-NEXT:    br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK4-NEXT:    [[TMP40:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215.region_id, i32 5, i8** [[TMP37]], i8** [[TMP38]], i64* [[TMP39]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK4-NEXT:    [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
+// CHECK4-NEXT:    br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK4:       omp_offload.failed:
 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK4-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -6570,15 +6498,15 @@ int bar(int n){
 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR4]]
 // CHECK4-NEXT:    br label [[OMP_IF_END]]
 // CHECK4:       omp_if.end:
-// CHECK4-NEXT:    [[TMP45:%.*]] = mul nsw i32 1, [[TMP1]]
-// CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP45]]
+// CHECK4-NEXT:    [[TMP42:%.*]] = mul nsw i32 1, [[TMP1]]
+// CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP42]]
 // CHECK4-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
-// CHECK4-NEXT:    [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
-// CHECK4-NEXT:    [[CONV:%.*]] = sext i16 [[TMP46]] to i32
-// CHECK4-NEXT:    [[TMP47:%.*]] = load i32, i32* [[B]], align 4
-// CHECK4-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP47]]
-// CHECK4-NEXT:    [[TMP48:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP48]])
+// CHECK4-NEXT:    [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
+// CHECK4-NEXT:    [[CONV:%.*]] = sext i16 [[TMP43]] to i32
+// CHECK4-NEXT:    [[TMP44:%.*]] = load i32, i32* [[B]], align 4
+// CHECK4-NEXT:    [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP44]]
+// CHECK4-NEXT:    [[TMP45:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP45]])
 // CHECK4-NEXT:    ret i32 [[ADD3]]
 //
 //
@@ -6681,7 +6609,7 @@ int bar(int n){
 // CHECK4-NEXT:    [[ADD6:%.*]] = add i32 [[TMP40]], 1
 // CHECK4-NEXT:    [[TMP41:%.*]] = zext i32 [[ADD6]] to i64
 // CHECK4-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP41]])
-// CHECK4-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l197.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK4-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l197.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK4-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK4-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK4:       omp_offload.failed:
@@ -6751,7 +6679,7 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK4-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK4-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK4-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l180.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK4-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l180.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK4-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK4-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK4:       omp_offload.failed:
@@ -6788,11 +6716,11 @@ int bar(int n){
 // CHECK4-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4
 // CHECK4-NEXT:    store i32 [[TMP4]], i32* [[B_CASTED]], align 4
 // CHECK4-NEXT:    [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]])
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..12
 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -6921,11 +6849,11 @@ int bar(int n){
 // CHECK4-NEXT:    [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
 // CHECK4-NEXT:    store i8 [[TMP7]], i8* [[CONV3]], align 1
 // CHECK4-NEXT:    [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], [10 x i32]* [[TMP0]])
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], [10 x i32]* [[TMP0]])
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..13
+// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[N:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -7083,11 +7011,11 @@ int bar(int n){
 // CHECK4-NEXT:    [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
 // CHECK4-NEXT:    store i16 [[TMP3]], i16* [[CONV1]], align 2
 // CHECK4-NEXT:    [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK4-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -7391,14 +7319,14 @@ int bar(int n){
 // CHECK5-NEXT:    [[TMP92:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK5-NEXT:    [[TMP93:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK5-NEXT:    [[TMP94:%.*]] = mul nuw i64 [[TMP93]], 8
-// CHECK5-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0
-// CHECK5-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i64*
-// CHECK5-NEXT:    store i64 [[TMP90]], i64* [[TMP96]], align 8
-// CHECK5-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 0
-// CHECK5-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to i64*
-// CHECK5-NEXT:    store i64 [[TMP90]], i64* [[TMP98]], align 8
-// CHECK5-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK5-NEXT:    store i64 4, i64* [[TMP99]], align 8
+// CHECK5-NEXT:    [[TMP95:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK5-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP95]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.10 to i8*), i64 72, i1 false)
+// CHECK5-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0
+// CHECK5-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i64*
+// CHECK5-NEXT:    store i64 [[TMP90]], i64* [[TMP97]], align 8
+// CHECK5-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 0
+// CHECK5-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i64*
+// CHECK5-NEXT:    store i64 [[TMP90]], i64* [[TMP99]], align 8
 // CHECK5-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 0
 // CHECK5-NEXT:    store i8* null, i8** [[TMP100]], align 8
 // CHECK5-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 1
@@ -7407,87 +7335,75 @@ int bar(int n){
 // CHECK5-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 1
 // CHECK5-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to [10 x float]**
 // CHECK5-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP104]], align 8
-// CHECK5-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK5-NEXT:    store i64 40, i64* [[TMP105]], align 8
-// CHECK5-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 1
-// CHECK5-NEXT:    store i8* null, i8** [[TMP106]], align 8
-// CHECK5-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 2
-// CHECK5-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i64*
-// CHECK5-NEXT:    store i64 [[TMP2]], i64* [[TMP108]], align 8
-// CHECK5-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 2
-// CHECK5-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i64*
-// CHECK5-NEXT:    store i64 [[TMP2]], i64* [[TMP110]], align 8
-// CHECK5-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK5-NEXT:    store i64 8, i64* [[TMP111]], align 8
-// CHECK5-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 2
-// CHECK5-NEXT:    store i8* null, i8** [[TMP112]], align 8
-// CHECK5-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 3
+// CHECK5-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 1
+// CHECK5-NEXT:    store i8* null, i8** [[TMP105]], align 8
+// CHECK5-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 2
+// CHECK5-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i64*
+// CHECK5-NEXT:    store i64 [[TMP2]], i64* [[TMP107]], align 8
+// CHECK5-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 2
+// CHECK5-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i64*
+// CHECK5-NEXT:    store i64 [[TMP2]], i64* [[TMP109]], align 8
+// CHECK5-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 2
+// CHECK5-NEXT:    store i8* null, i8** [[TMP110]], align 8
+// CHECK5-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 3
+// CHECK5-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to float**
+// CHECK5-NEXT:    store float* [[VLA]], float** [[TMP112]], align 8
+// CHECK5-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 3
 // CHECK5-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to float**
 // CHECK5-NEXT:    store float* [[VLA]], float** [[TMP114]], align 8
-// CHECK5-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 3
-// CHECK5-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to float**
-// CHECK5-NEXT:    store float* [[VLA]], float** [[TMP116]], align 8
-// CHECK5-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK5-NEXT:    store i64 [[TMP92]], i64* [[TMP117]], align 8
-// CHECK5-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 3
-// CHECK5-NEXT:    store i8* null, i8** [[TMP118]], align 8
-// CHECK5-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 4
+// CHECK5-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK5-NEXT:    store i64 [[TMP92]], i64* [[TMP115]], align 8
+// CHECK5-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 3
+// CHECK5-NEXT:    store i8* null, i8** [[TMP116]], align 8
+// CHECK5-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 4
+// CHECK5-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to [5 x [10 x double]]**
+// CHECK5-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP118]], align 8
+// CHECK5-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 4
 // CHECK5-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to [5 x [10 x double]]**
 // CHECK5-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP120]], align 8
-// CHECK5-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 4
-// CHECK5-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to [5 x [10 x double]]**
-// CHECK5-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP122]], align 8
-// CHECK5-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK5-NEXT:    store i64 400, i64* [[TMP123]], align 8
-// CHECK5-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 4
-// CHECK5-NEXT:    store i8* null, i8** [[TMP124]], align 8
-// CHECK5-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 5
-// CHECK5-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i64*
-// CHECK5-NEXT:    store i64 5, i64* [[TMP126]], align 8
-// CHECK5-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 5
+// CHECK5-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 4
+// CHECK5-NEXT:    store i8* null, i8** [[TMP121]], align 8
+// CHECK5-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 5
+// CHECK5-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i64*
+// CHECK5-NEXT:    store i64 5, i64* [[TMP123]], align 8
+// CHECK5-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 5
+// CHECK5-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to i64*
+// CHECK5-NEXT:    store i64 5, i64* [[TMP125]], align 8
+// CHECK5-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 5
+// CHECK5-NEXT:    store i8* null, i8** [[TMP126]], align 8
+// CHECK5-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 6
 // CHECK5-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i64*
-// CHECK5-NEXT:    store i64 5, i64* [[TMP128]], align 8
-// CHECK5-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK5-NEXT:    store i64 8, i64* [[TMP129]], align 8
-// CHECK5-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 5
-// CHECK5-NEXT:    store i8* null, i8** [[TMP130]], align 8
-// CHECK5-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 6
-// CHECK5-NEXT:    [[TMP132:%.*]] = bitcast i8** [[TMP131]] to i64*
-// CHECK5-NEXT:    store i64 [[TMP5]], i64* [[TMP132]], align 8
-// CHECK5-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 6
-// CHECK5-NEXT:    [[TMP134:%.*]] = bitcast i8** [[TMP133]] to i64*
-// CHECK5-NEXT:    store i64 [[TMP5]], i64* [[TMP134]], align 8
-// CHECK5-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK5-NEXT:    store i64 8, i64* [[TMP135]], align 8
-// CHECK5-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 6
-// CHECK5-NEXT:    store i8* null, i8** [[TMP136]], align 8
-// CHECK5-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 7
-// CHECK5-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to double**
-// CHECK5-NEXT:    store double* [[VLA1]], double** [[TMP138]], align 8
-// CHECK5-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 7
-// CHECK5-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to double**
-// CHECK5-NEXT:    store double* [[VLA1]], double** [[TMP140]], align 8
-// CHECK5-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK5-NEXT:    store i64 [[TMP94]], i64* [[TMP141]], align 8
-// CHECK5-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 7
+// CHECK5-NEXT:    store i64 [[TMP5]], i64* [[TMP128]], align 8
+// CHECK5-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 6
+// CHECK5-NEXT:    [[TMP130:%.*]] = bitcast i8** [[TMP129]] to i64*
+// CHECK5-NEXT:    store i64 [[TMP5]], i64* [[TMP130]], align 8
+// CHECK5-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 6
+// CHECK5-NEXT:    store i8* null, i8** [[TMP131]], align 8
+// CHECK5-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 7
+// CHECK5-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to double**
+// CHECK5-NEXT:    store double* [[VLA1]], double** [[TMP133]], align 8
+// CHECK5-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 7
+// CHECK5-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to double**
+// CHECK5-NEXT:    store double* [[VLA1]], double** [[TMP135]], align 8
+// CHECK5-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK5-NEXT:    store i64 [[TMP94]], i64* [[TMP136]], align 8
+// CHECK5-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 7
+// CHECK5-NEXT:    store i8* null, i8** [[TMP137]], align 8
+// CHECK5-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 8
+// CHECK5-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to %struct.TT**
+// CHECK5-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP139]], align 8
+// CHECK5-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 8
+// CHECK5-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to %struct.TT**
+// CHECK5-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP141]], align 8
+// CHECK5-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 8
 // CHECK5-NEXT:    store i8* null, i8** [[TMP142]], align 8
-// CHECK5-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 8
-// CHECK5-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to %struct.TT**
-// CHECK5-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP144]], align 8
-// CHECK5-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 8
-// CHECK5-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to %struct.TT**
-// CHECK5-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP146]], align 8
-// CHECK5-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK5-NEXT:    store i64 16, i64* [[TMP147]], align 8
-// CHECK5-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 8
-// CHECK5-NEXT:    store i8* null, i8** [[TMP148]], align 8
-// CHECK5-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0
-// CHECK5-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 0
-// CHECK5-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK5-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0
+// CHECK5-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 0
+// CHECK5-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK5-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK5-NEXT:    [[TMP152:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP149]], i8** [[TMP150]], i64* [[TMP151]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK5-NEXT:    [[TMP153:%.*]] = icmp ne i32 [[TMP152]], 0
-// CHECK5-NEXT:    br i1 [[TMP153]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK5-NEXT:    [[TMP146:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP143]], i8** [[TMP144]], i64* [[TMP145]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.11, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK5-NEXT:    [[TMP147:%.*]] = icmp ne i32 [[TMP146]], 0
+// CHECK5-NEXT:    br i1 [[TMP147]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
 // CHECK5:       omp_offload.failed30:
 // CHECK5-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142(i64 [[TMP90]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK5-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
@@ -7497,10 +7413,10 @@ int bar(int n){
 // CHECK5-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142(i64 [[TMP90]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK5-NEXT:    br label [[OMP_IF_END33]]
 // CHECK5:       omp_if.end33:
-// CHECK5-NEXT:    [[TMP154:%.*]] = load i32, i32* [[A]], align 4
-// CHECK5-NEXT:    [[TMP155:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK5-NEXT:    call void @llvm.stackrestore(i8* [[TMP155]])
-// CHECK5-NEXT:    ret i32 [[TMP154]]
+// CHECK5-NEXT:    [[TMP148:%.*]] = load i32, i32* [[A]], align 4
+// CHECK5-NEXT:    [[TMP149:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK5-NEXT:    call void @llvm.stackrestore(i8* [[TMP149]])
+// CHECK5-NEXT:    ret i32 [[TMP148]]
 //
 //
 // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97
@@ -8240,14 +8156,14 @@ int bar(int n){
 // CHECK5-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP11:%.*]] = mul nuw i64 2, [[TMP2]]
 // CHECK5-NEXT:    [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 2
-// CHECK5-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to %struct.S1**
-// CHECK5-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP14]], align 8
-// CHECK5-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to double**
-// CHECK5-NEXT:    store double* [[A]], double** [[TMP16]], align 8
-// CHECK5-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK5-NEXT:    store i64 8, i64* [[TMP17]], align 8
+// CHECK5-NEXT:    [[TMP13:%.*]] = bitcast [6 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK5-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP13]], i8* align 8 bitcast ([6 x i64]* @.offload_sizes.13 to i8*), i64 48, i1 false)
+// CHECK5-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK5-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to %struct.S1**
+// CHECK5-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP15]], align 8
+// CHECK5-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK5-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to double**
+// CHECK5-NEXT:    store double* [[A]], double** [[TMP17]], align 8
 // CHECK5-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK5-NEXT:    store i8* null, i8** [[TMP18]], align 8
 // CHECK5-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -8256,57 +8172,49 @@ int bar(int n){
 // CHECK5-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK5-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
 // CHECK5-NEXT:    store i64 [[TMP7]], i64* [[TMP22]], align 8
-// CHECK5-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK5-NEXT:    store i64 4, i64* [[TMP23]], align 8
-// CHECK5-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK5-NEXT:    store i8* null, i8** [[TMP24]], align 8
-// CHECK5-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK5-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
-// CHECK5-NEXT:    store i64 2, i64* [[TMP26]], align 8
-// CHECK5-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK5-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64*
-// CHECK5-NEXT:    store i64 2, i64* [[TMP28]], align 8
-// CHECK5-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK5-NEXT:    store i64 8, i64* [[TMP29]], align 8
-// CHECK5-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK5-NEXT:    store i8* null, i8** [[TMP30]], align 8
-// CHECK5-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK5-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK5-NEXT:    store i8* null, i8** [[TMP23]], align 8
+// CHECK5-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK5-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
+// CHECK5-NEXT:    store i64 2, i64* [[TMP25]], align 8
+// CHECK5-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK5-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK5-NEXT:    store i64 2, i64* [[TMP27]], align 8
+// CHECK5-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK5-NEXT:    store i8* null, i8** [[TMP28]], align 8
+// CHECK5-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK5-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i64*
+// CHECK5-NEXT:    store i64 [[TMP2]], i64* [[TMP30]], align 8
+// CHECK5-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK5-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i64*
 // CHECK5-NEXT:    store i64 [[TMP2]], i64* [[TMP32]], align 8
-// CHECK5-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK5-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i64*
-// CHECK5-NEXT:    store i64 [[TMP2]], i64* [[TMP34]], align 8
-// CHECK5-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK5-NEXT:    store i64 8, i64* [[TMP35]], align 8
-// CHECK5-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK5-NEXT:    store i8* null, i8** [[TMP36]], align 8
-// CHECK5-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK5-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i16**
-// CHECK5-NEXT:    store i16* [[VLA]], i16** [[TMP38]], align 8
-// CHECK5-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK5-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i16**
-// CHECK5-NEXT:    store i16* [[VLA]], i16** [[TMP40]], align 8
-// CHECK5-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK5-NEXT:    store i64 [[TMP12]], i64* [[TMP41]], align 8
-// CHECK5-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK5-NEXT:    store i8* null, i8** [[TMP42]], align 8
-// CHECK5-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
-// CHECK5-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
-// CHECK5-NEXT:    store i64 [[TMP9]], i64* [[TMP44]], align 8
-// CHECK5-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
-// CHECK5-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i64*
-// CHECK5-NEXT:    store i64 [[TMP9]], i64* [[TMP46]], align 8
-// CHECK5-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK5-NEXT:    store i64 1, i64* [[TMP47]], align 8
-// CHECK5-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
-// CHECK5-NEXT:    store i8* null, i8** [[TMP48]], align 8
-// CHECK5-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK5-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK5-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK5-NEXT:    store i8* null, i8** [[TMP33]], align 8
+// CHECK5-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK5-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
+// CHECK5-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 8
+// CHECK5-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK5-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
+// CHECK5-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 8
+// CHECK5-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK5-NEXT:    store i64 [[TMP12]], i64* [[TMP38]], align 8
+// CHECK5-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK5-NEXT:    store i8* null, i8** [[TMP39]], align 8
+// CHECK5-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
+// CHECK5-NEXT:    [[TMP41:%.*]] = bitcast i8** [[TMP40]] to i64*
+// CHECK5-NEXT:    store i64 [[TMP9]], i64* [[TMP41]], align 8
+// CHECK5-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK5-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
+// CHECK5-NEXT:    store i64 [[TMP9]], i64* [[TMP43]], align 8
+// CHECK5-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
+// CHECK5-NEXT:    store i8* null, i8** [[TMP44]], align 8
+// CHECK5-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK5-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK5-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK5-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK5-NEXT:    [[TMP52:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215.region_id, i32 6, i8** [[TMP49]], i8** [[TMP50]], i64* [[TMP51]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK5-NEXT:    [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
-// CHECK5-NEXT:    br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK5-NEXT:    [[TMP48:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215.region_id, i32 6, i8** [[TMP45]], i8** [[TMP46]], i64* [[TMP47]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK5-NEXT:    [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0
+// CHECK5-NEXT:    br i1 [[TMP49]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK5:       omp_offload.failed:
 // CHECK5-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215(%struct.S1* [[THIS1]], i64 [[TMP7]], i64 2, i64 [[TMP2]], i16* [[VLA]], i64 [[TMP9]]) #[[ATTR4]]
 // CHECK5-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -8316,15 +8224,15 @@ int bar(int n){
 // CHECK5-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215(%struct.S1* [[THIS1]], i64 [[TMP7]], i64 2, i64 [[TMP2]], i16* [[VLA]], i64 [[TMP9]]) #[[ATTR4]]
 // CHECK5-NEXT:    br label [[OMP_IF_END]]
 // CHECK5:       omp_if.end:
-// CHECK5-NEXT:    [[TMP54:%.*]] = mul nsw i64 1, [[TMP2]]
-// CHECK5-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP54]]
+// CHECK5-NEXT:    [[TMP50:%.*]] = mul nsw i64 1, [[TMP2]]
+// CHECK5-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP50]]
 // CHECK5-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
-// CHECK5-NEXT:    [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX5]], align 2
-// CHECK5-NEXT:    [[CONV6:%.*]] = sext i16 [[TMP55]] to i32
-// CHECK5-NEXT:    [[TMP56:%.*]] = load i32, i32* [[B]], align 4
-// CHECK5-NEXT:    [[ADD7:%.*]] = add nsw i32 [[CONV6]], [[TMP56]]
-// CHECK5-NEXT:    [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK5-NEXT:    call void @llvm.stackrestore(i8* [[TMP57]])
+// CHECK5-NEXT:    [[TMP51:%.*]] = load i16, i16* [[ARRAYIDX5]], align 2
+// CHECK5-NEXT:    [[CONV6:%.*]] = sext i16 [[TMP51]] to i32
+// CHECK5-NEXT:    [[TMP52:%.*]] = load i32, i32* [[B]], align 4
+// CHECK5-NEXT:    [[ADD7:%.*]] = add nsw i32 [[CONV6]], [[TMP52]]
+// CHECK5-NEXT:    [[TMP53:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK5-NEXT:    call void @llvm.stackrestore(i8* [[TMP53]])
 // CHECK5-NEXT:    ret i32 [[ADD7]]
 //
 //
@@ -8429,7 +8337,7 @@ int bar(int n){
 // CHECK5-NEXT:    [[ADD8:%.*]] = add i32 [[TMP40]], 1
 // CHECK5-NEXT:    [[TMP41:%.*]] = zext i32 [[ADD8]] to i64
 // CHECK5-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP41]])
-// CHECK5-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l197.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK5-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l197.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK5-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK5-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK5:       omp_offload.failed:
@@ -8500,7 +8408,7 @@ int bar(int n){
 // CHECK5-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK5-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK5-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK5-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l180.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK5-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l180.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK5-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK5-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK5:       omp_offload.failed:
@@ -8549,11 +8457,11 @@ int bar(int n){
 // CHECK5-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
 // CHECK5-NEXT:    store i8 [[FROMBOOL]], i8* [[CONV5]], align 1
 // CHECK5-NEXT:    [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK5-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]], i64 [[TMP7]])
+// CHECK5-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]], i64 [[TMP7]])
 // CHECK5-NEXT:    ret void
 //
 //
-// CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..12
 // CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] {
 // CHECK5-NEXT:  entry:
 // CHECK5-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -8732,11 +8640,11 @@ int bar(int n){
 // CHECK5-NEXT:    [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
 // CHECK5-NEXT:    store i8 [[TMP7]], i8* [[CONV7]], align 1
 // CHECK5-NEXT:    [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
-// CHECK5-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], [10 x i32]* [[TMP0]])
+// CHECK5-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], [10 x i32]* [[TMP0]])
 // CHECK5-NEXT:    ret void
 //
 //
-// CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..13
+// CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[N:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK5-NEXT:  entry:
 // CHECK5-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -8898,11 +8806,11 @@ int bar(int n){
 // CHECK5-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
 // CHECK5-NEXT:    store i16 [[TMP3]], i16* [[CONV3]], align 2
 // CHECK5-NEXT:    [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8
-// CHECK5-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK5-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK5-NEXT:    ret void
 //
 //
-// CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK5-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK5-NEXT:  entry:
 // CHECK5-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9207,14 +9115,14 @@ int bar(int n){
 // CHECK6-NEXT:    [[TMP92:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK6-NEXT:    [[TMP93:%.*]] = mul nuw i64 5, [[TMP5]]
 // CHECK6-NEXT:    [[TMP94:%.*]] = mul nuw i64 [[TMP93]], 8
-// CHECK6-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0
-// CHECK6-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i64*
-// CHECK6-NEXT:    store i64 [[TMP90]], i64* [[TMP96]], align 8
-// CHECK6-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 0
-// CHECK6-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to i64*
-// CHECK6-NEXT:    store i64 [[TMP90]], i64* [[TMP98]], align 8
-// CHECK6-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK6-NEXT:    store i64 4, i64* [[TMP99]], align 8
+// CHECK6-NEXT:    [[TMP95:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK6-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP95]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.10 to i8*), i64 72, i1 false)
+// CHECK6-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0
+// CHECK6-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i64*
+// CHECK6-NEXT:    store i64 [[TMP90]], i64* [[TMP97]], align 8
+// CHECK6-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 0
+// CHECK6-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i64*
+// CHECK6-NEXT:    store i64 [[TMP90]], i64* [[TMP99]], align 8
 // CHECK6-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 0
 // CHECK6-NEXT:    store i8* null, i8** [[TMP100]], align 8
 // CHECK6-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 1
@@ -9223,87 +9131,75 @@ int bar(int n){
 // CHECK6-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 1
 // CHECK6-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to [10 x float]**
 // CHECK6-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP104]], align 8
-// CHECK6-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK6-NEXT:    store i64 40, i64* [[TMP105]], align 8
-// CHECK6-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 1
-// CHECK6-NEXT:    store i8* null, i8** [[TMP106]], align 8
-// CHECK6-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 2
-// CHECK6-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i64*
-// CHECK6-NEXT:    store i64 [[TMP2]], i64* [[TMP108]], align 8
-// CHECK6-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 2
-// CHECK6-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i64*
-// CHECK6-NEXT:    store i64 [[TMP2]], i64* [[TMP110]], align 8
-// CHECK6-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK6-NEXT:    store i64 8, i64* [[TMP111]], align 8
-// CHECK6-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 2
-// CHECK6-NEXT:    store i8* null, i8** [[TMP112]], align 8
-// CHECK6-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 3
+// CHECK6-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 1
+// CHECK6-NEXT:    store i8* null, i8** [[TMP105]], align 8
+// CHECK6-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 2
+// CHECK6-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i64*
+// CHECK6-NEXT:    store i64 [[TMP2]], i64* [[TMP107]], align 8
+// CHECK6-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 2
+// CHECK6-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i64*
+// CHECK6-NEXT:    store i64 [[TMP2]], i64* [[TMP109]], align 8
+// CHECK6-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 2
+// CHECK6-NEXT:    store i8* null, i8** [[TMP110]], align 8
+// CHECK6-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 3
+// CHECK6-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to float**
+// CHECK6-NEXT:    store float* [[VLA]], float** [[TMP112]], align 8
+// CHECK6-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 3
 // CHECK6-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to float**
 // CHECK6-NEXT:    store float* [[VLA]], float** [[TMP114]], align 8
-// CHECK6-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 3
-// CHECK6-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to float**
-// CHECK6-NEXT:    store float* [[VLA]], float** [[TMP116]], align 8
-// CHECK6-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK6-NEXT:    store i64 [[TMP92]], i64* [[TMP117]], align 8
-// CHECK6-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 3
-// CHECK6-NEXT:    store i8* null, i8** [[TMP118]], align 8
-// CHECK6-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 4
+// CHECK6-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK6-NEXT:    store i64 [[TMP92]], i64* [[TMP115]], align 8
+// CHECK6-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 3
+// CHECK6-NEXT:    store i8* null, i8** [[TMP116]], align 8
+// CHECK6-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 4
+// CHECK6-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to [5 x [10 x double]]**
+// CHECK6-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP118]], align 8
+// CHECK6-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 4
 // CHECK6-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to [5 x [10 x double]]**
 // CHECK6-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP120]], align 8
-// CHECK6-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 4
-// CHECK6-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to [5 x [10 x double]]**
-// CHECK6-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP122]], align 8
-// CHECK6-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK6-NEXT:    store i64 400, i64* [[TMP123]], align 8
-// CHECK6-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 4
-// CHECK6-NEXT:    store i8* null, i8** [[TMP124]], align 8
-// CHECK6-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 5
-// CHECK6-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i64*
-// CHECK6-NEXT:    store i64 5, i64* [[TMP126]], align 8
-// CHECK6-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 5
+// CHECK6-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 4
+// CHECK6-NEXT:    store i8* null, i8** [[TMP121]], align 8
+// CHECK6-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 5
+// CHECK6-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i64*
+// CHECK6-NEXT:    store i64 5, i64* [[TMP123]], align 8
+// CHECK6-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 5
+// CHECK6-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to i64*
+// CHECK6-NEXT:    store i64 5, i64* [[TMP125]], align 8
+// CHECK6-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 5
+// CHECK6-NEXT:    store i8* null, i8** [[TMP126]], align 8
+// CHECK6-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 6
 // CHECK6-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i64*
-// CHECK6-NEXT:    store i64 5, i64* [[TMP128]], align 8
-// CHECK6-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK6-NEXT:    store i64 8, i64* [[TMP129]], align 8
-// CHECK6-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 5
-// CHECK6-NEXT:    store i8* null, i8** [[TMP130]], align 8
-// CHECK6-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 6
-// CHECK6-NEXT:    [[TMP132:%.*]] = bitcast i8** [[TMP131]] to i64*
-// CHECK6-NEXT:    store i64 [[TMP5]], i64* [[TMP132]], align 8
-// CHECK6-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 6
-// CHECK6-NEXT:    [[TMP134:%.*]] = bitcast i8** [[TMP133]] to i64*
-// CHECK6-NEXT:    store i64 [[TMP5]], i64* [[TMP134]], align 8
-// CHECK6-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK6-NEXT:    store i64 8, i64* [[TMP135]], align 8
-// CHECK6-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 6
-// CHECK6-NEXT:    store i8* null, i8** [[TMP136]], align 8
-// CHECK6-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 7
-// CHECK6-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to double**
-// CHECK6-NEXT:    store double* [[VLA1]], double** [[TMP138]], align 8
-// CHECK6-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 7
-// CHECK6-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to double**
-// CHECK6-NEXT:    store double* [[VLA1]], double** [[TMP140]], align 8
-// CHECK6-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK6-NEXT:    store i64 [[TMP94]], i64* [[TMP141]], align 8
-// CHECK6-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 7
+// CHECK6-NEXT:    store i64 [[TMP5]], i64* [[TMP128]], align 8
+// CHECK6-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 6
+// CHECK6-NEXT:    [[TMP130:%.*]] = bitcast i8** [[TMP129]] to i64*
+// CHECK6-NEXT:    store i64 [[TMP5]], i64* [[TMP130]], align 8
+// CHECK6-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 6
+// CHECK6-NEXT:    store i8* null, i8** [[TMP131]], align 8
+// CHECK6-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 7
+// CHECK6-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to double**
+// CHECK6-NEXT:    store double* [[VLA1]], double** [[TMP133]], align 8
+// CHECK6-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 7
+// CHECK6-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to double**
+// CHECK6-NEXT:    store double* [[VLA1]], double** [[TMP135]], align 8
+// CHECK6-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK6-NEXT:    store i64 [[TMP94]], i64* [[TMP136]], align 8
+// CHECK6-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 7
+// CHECK6-NEXT:    store i8* null, i8** [[TMP137]], align 8
+// CHECK6-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 8
+// CHECK6-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to %struct.TT**
+// CHECK6-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP139]], align 8
+// CHECK6-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 8
+// CHECK6-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to %struct.TT**
+// CHECK6-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP141]], align 8
+// CHECK6-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 8
 // CHECK6-NEXT:    store i8* null, i8** [[TMP142]], align 8
-// CHECK6-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 8
-// CHECK6-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to %struct.TT**
-// CHECK6-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP144]], align 8
-// CHECK6-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 8
-// CHECK6-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to %struct.TT**
-// CHECK6-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP146]], align 8
-// CHECK6-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK6-NEXT:    store i64 16, i64* [[TMP147]], align 8
-// CHECK6-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 8
-// CHECK6-NEXT:    store i8* null, i8** [[TMP148]], align 8
-// CHECK6-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0
-// CHECK6-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 0
-// CHECK6-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK6-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0
+// CHECK6-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS27]], i32 0, i32 0
+// CHECK6-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK6-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK6-NEXT:    [[TMP152:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP149]], i8** [[TMP150]], i64* [[TMP151]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK6-NEXT:    [[TMP153:%.*]] = icmp ne i32 [[TMP152]], 0
-// CHECK6-NEXT:    br i1 [[TMP153]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK6-NEXT:    [[TMP146:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP143]], i8** [[TMP144]], i64* [[TMP145]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.11, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK6-NEXT:    [[TMP147:%.*]] = icmp ne i32 [[TMP146]], 0
+// CHECK6-NEXT:    br i1 [[TMP147]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
 // CHECK6:       omp_offload.failed30:
 // CHECK6-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142(i64 [[TMP90]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK6-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
@@ -9313,10 +9209,10 @@ int bar(int n){
 // CHECK6-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142(i64 [[TMP90]], [10 x float]* [[B]], i64 [[TMP2]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP5]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK6-NEXT:    br label [[OMP_IF_END33]]
 // CHECK6:       omp_if.end33:
-// CHECK6-NEXT:    [[TMP154:%.*]] = load i32, i32* [[A]], align 4
-// CHECK6-NEXT:    [[TMP155:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK6-NEXT:    call void @llvm.stackrestore(i8* [[TMP155]])
-// CHECK6-NEXT:    ret i32 [[TMP154]]
+// CHECK6-NEXT:    [[TMP148:%.*]] = load i32, i32* [[A]], align 4
+// CHECK6-NEXT:    [[TMP149:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK6-NEXT:    call void @llvm.stackrestore(i8* [[TMP149]])
+// CHECK6-NEXT:    ret i32 [[TMP148]]
 //
 //
 // CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97
@@ -10056,14 +9952,14 @@ int bar(int n){
 // CHECK6-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0
 // CHECK6-NEXT:    [[TMP11:%.*]] = mul nuw i64 2, [[TMP2]]
 // CHECK6-NEXT:    [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 2
-// CHECK6-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK6-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to %struct.S1**
-// CHECK6-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP14]], align 8
-// CHECK6-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK6-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to double**
-// CHECK6-NEXT:    store double* [[A]], double** [[TMP16]], align 8
-// CHECK6-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK6-NEXT:    store i64 8, i64* [[TMP17]], align 8
+// CHECK6-NEXT:    [[TMP13:%.*]] = bitcast [6 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK6-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP13]], i8* align 8 bitcast ([6 x i64]* @.offload_sizes.13 to i8*), i64 48, i1 false)
+// CHECK6-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK6-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to %struct.S1**
+// CHECK6-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP15]], align 8
+// CHECK6-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK6-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to double**
+// CHECK6-NEXT:    store double* [[A]], double** [[TMP17]], align 8
 // CHECK6-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK6-NEXT:    store i8* null, i8** [[TMP18]], align 8
 // CHECK6-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -10072,57 +9968,49 @@ int bar(int n){
 // CHECK6-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK6-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
 // CHECK6-NEXT:    store i64 [[TMP7]], i64* [[TMP22]], align 8
-// CHECK6-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK6-NEXT:    store i64 4, i64* [[TMP23]], align 8
-// CHECK6-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK6-NEXT:    store i8* null, i8** [[TMP24]], align 8
-// CHECK6-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK6-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
-// CHECK6-NEXT:    store i64 2, i64* [[TMP26]], align 8
-// CHECK6-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK6-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64*
-// CHECK6-NEXT:    store i64 2, i64* [[TMP28]], align 8
-// CHECK6-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK6-NEXT:    store i64 8, i64* [[TMP29]], align 8
-// CHECK6-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK6-NEXT:    store i8* null, i8** [[TMP30]], align 8
-// CHECK6-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK6-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK6-NEXT:    store i8* null, i8** [[TMP23]], align 8
+// CHECK6-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK6-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
+// CHECK6-NEXT:    store i64 2, i64* [[TMP25]], align 8
+// CHECK6-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK6-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK6-NEXT:    store i64 2, i64* [[TMP27]], align 8
+// CHECK6-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK6-NEXT:    store i8* null, i8** [[TMP28]], align 8
+// CHECK6-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK6-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i64*
+// CHECK6-NEXT:    store i64 [[TMP2]], i64* [[TMP30]], align 8
+// CHECK6-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK6-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i64*
 // CHECK6-NEXT:    store i64 [[TMP2]], i64* [[TMP32]], align 8
-// CHECK6-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK6-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i64*
-// CHECK6-NEXT:    store i64 [[TMP2]], i64* [[TMP34]], align 8
-// CHECK6-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK6-NEXT:    store i64 8, i64* [[TMP35]], align 8
-// CHECK6-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK6-NEXT:    store i8* null, i8** [[TMP36]], align 8
-// CHECK6-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK6-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i16**
-// CHECK6-NEXT:    store i16* [[VLA]], i16** [[TMP38]], align 8
-// CHECK6-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK6-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i16**
-// CHECK6-NEXT:    store i16* [[VLA]], i16** [[TMP40]], align 8
-// CHECK6-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK6-NEXT:    store i64 [[TMP12]], i64* [[TMP41]], align 8
-// CHECK6-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK6-NEXT:    store i8* null, i8** [[TMP42]], align 8
-// CHECK6-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
-// CHECK6-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
-// CHECK6-NEXT:    store i64 [[TMP9]], i64* [[TMP44]], align 8
-// CHECK6-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
-// CHECK6-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i64*
-// CHECK6-NEXT:    store i64 [[TMP9]], i64* [[TMP46]], align 8
-// CHECK6-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK6-NEXT:    store i64 1, i64* [[TMP47]], align 8
-// CHECK6-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
-// CHECK6-NEXT:    store i8* null, i8** [[TMP48]], align 8
-// CHECK6-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK6-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK6-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK6-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK6-NEXT:    store i8* null, i8** [[TMP33]], align 8
+// CHECK6-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK6-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
+// CHECK6-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 8
+// CHECK6-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK6-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
+// CHECK6-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 8
+// CHECK6-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK6-NEXT:    store i64 [[TMP12]], i64* [[TMP38]], align 8
+// CHECK6-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK6-NEXT:    store i8* null, i8** [[TMP39]], align 8
+// CHECK6-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
+// CHECK6-NEXT:    [[TMP41:%.*]] = bitcast i8** [[TMP40]] to i64*
+// CHECK6-NEXT:    store i64 [[TMP9]], i64* [[TMP41]], align 8
+// CHECK6-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK6-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
+// CHECK6-NEXT:    store i64 [[TMP9]], i64* [[TMP43]], align 8
+// CHECK6-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
+// CHECK6-NEXT:    store i8* null, i8** [[TMP44]], align 8
+// CHECK6-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK6-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK6-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK6-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK6-NEXT:    [[TMP52:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215.region_id, i32 6, i8** [[TMP49]], i8** [[TMP50]], i64* [[TMP51]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK6-NEXT:    [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
-// CHECK6-NEXT:    br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK6-NEXT:    [[TMP48:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215.region_id, i32 6, i8** [[TMP45]], i8** [[TMP46]], i64* [[TMP47]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK6-NEXT:    [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0
+// CHECK6-NEXT:    br i1 [[TMP49]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK6:       omp_offload.failed:
 // CHECK6-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215(%struct.S1* [[THIS1]], i64 [[TMP7]], i64 2, i64 [[TMP2]], i16* [[VLA]], i64 [[TMP9]]) #[[ATTR4]]
 // CHECK6-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -10132,15 +10020,15 @@ int bar(int n){
 // CHECK6-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215(%struct.S1* [[THIS1]], i64 [[TMP7]], i64 2, i64 [[TMP2]], i16* [[VLA]], i64 [[TMP9]]) #[[ATTR4]]
 // CHECK6-NEXT:    br label [[OMP_IF_END]]
 // CHECK6:       omp_if.end:
-// CHECK6-NEXT:    [[TMP54:%.*]] = mul nsw i64 1, [[TMP2]]
-// CHECK6-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP54]]
+// CHECK6-NEXT:    [[TMP50:%.*]] = mul nsw i64 1, [[TMP2]]
+// CHECK6-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP50]]
 // CHECK6-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1
-// CHECK6-NEXT:    [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX5]], align 2
-// CHECK6-NEXT:    [[CONV6:%.*]] = sext i16 [[TMP55]] to i32
-// CHECK6-NEXT:    [[TMP56:%.*]] = load i32, i32* [[B]], align 4
-// CHECK6-NEXT:    [[ADD7:%.*]] = add nsw i32 [[CONV6]], [[TMP56]]
-// CHECK6-NEXT:    [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK6-NEXT:    call void @llvm.stackrestore(i8* [[TMP57]])
+// CHECK6-NEXT:    [[TMP51:%.*]] = load i16, i16* [[ARRAYIDX5]], align 2
+// CHECK6-NEXT:    [[CONV6:%.*]] = sext i16 [[TMP51]] to i32
+// CHECK6-NEXT:    [[TMP52:%.*]] = load i32, i32* [[B]], align 4
+// CHECK6-NEXT:    [[ADD7:%.*]] = add nsw i32 [[CONV6]], [[TMP52]]
+// CHECK6-NEXT:    [[TMP53:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK6-NEXT:    call void @llvm.stackrestore(i8* [[TMP53]])
 // CHECK6-NEXT:    ret i32 [[ADD7]]
 //
 //
@@ -10245,7 +10133,7 @@ int bar(int n){
 // CHECK6-NEXT:    [[ADD8:%.*]] = add i32 [[TMP40]], 1
 // CHECK6-NEXT:    [[TMP41:%.*]] = zext i32 [[ADD8]] to i64
 // CHECK6-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP41]])
-// CHECK6-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l197.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK6-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l197.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK6-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK6-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK6:       omp_offload.failed:
@@ -10316,7 +10204,7 @@ int bar(int n){
 // CHECK6-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK6-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK6-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK6-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l180.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK6-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l180.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK6-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK6-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK6:       omp_offload.failed:
@@ -10365,11 +10253,11 @@ int bar(int n){
 // CHECK6-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
 // CHECK6-NEXT:    store i8 [[FROMBOOL]], i8* [[CONV5]], align 1
 // CHECK6-NEXT:    [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK6-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]], i64 [[TMP7]])
+// CHECK6-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i64, i64, i64, i16*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], i16* [[TMP3]], i64 [[TMP7]])
 // CHECK6-NEXT:    ret void
 //
 //
-// CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..12
 // CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] {
 // CHECK6-NEXT:  entry:
 // CHECK6-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10548,11 +10436,11 @@ int bar(int n){
 // CHECK6-NEXT:    [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8*
 // CHECK6-NEXT:    store i8 [[TMP7]], i8* [[CONV7]], align 1
 // CHECK6-NEXT:    [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8
-// CHECK6-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], [10 x i32]* [[TMP0]])
+// CHECK6-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], [10 x i32]* [[TMP0]])
 // CHECK6-NEXT:    ret void
 //
 //
-// CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..13
+// CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[N:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK6-NEXT:  entry:
 // CHECK6-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10714,11 +10602,11 @@ int bar(int n){
 // CHECK6-NEXT:    [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16*
 // CHECK6-NEXT:    store i16 [[TMP3]], i16* [[CONV3]], align 2
 // CHECK6-NEXT:    [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8
-// CHECK6-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK6-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP2]], i64 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK6-NEXT:    ret void
 //
 //
-// CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK6-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK6-NEXT:  entry:
 // CHECK6-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11018,14 +10906,14 @@ int bar(int n){
 // CHECK7-NEXT:    [[TMP92:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK7-NEXT:    [[TMP93:%.*]] = mul nuw i32 [[TMP92]], 8
 // CHECK7-NEXT:    [[TMP94:%.*]] = sext i32 [[TMP93]] to i64
-// CHECK7-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK7-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i32*
-// CHECK7-NEXT:    store i32 [[TMP88]], i32* [[TMP96]], align 4
-// CHECK7-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK7-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to i32*
-// CHECK7-NEXT:    store i32 [[TMP88]], i32* [[TMP98]], align 4
-// CHECK7-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK7-NEXT:    store i64 4, i64* [[TMP99]], align 4
+// CHECK7-NEXT:    [[TMP95:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK7-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP95]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.10 to i8*), i32 72, i1 false)
+// CHECK7-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK7-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i32*
+// CHECK7-NEXT:    store i32 [[TMP88]], i32* [[TMP97]], align 4
+// CHECK7-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK7-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i32*
+// CHECK7-NEXT:    store i32 [[TMP88]], i32* [[TMP99]], align 4
 // CHECK7-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 0
 // CHECK7-NEXT:    store i8* null, i8** [[TMP100]], align 4
 // CHECK7-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
@@ -11034,87 +10922,75 @@ int bar(int n){
 // CHECK7-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
 // CHECK7-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to [10 x float]**
 // CHECK7-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP104]], align 4
-// CHECK7-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK7-NEXT:    store i64 40, i64* [[TMP105]], align 4
-// CHECK7-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 1
-// CHECK7-NEXT:    store i8* null, i8** [[TMP106]], align 4
-// CHECK7-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
-// CHECK7-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i32*
-// CHECK7-NEXT:    store i32 [[TMP1]], i32* [[TMP108]], align 4
-// CHECK7-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
-// CHECK7-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i32*
-// CHECK7-NEXT:    store i32 [[TMP1]], i32* [[TMP110]], align 4
-// CHECK7-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK7-NEXT:    store i64 4, i64* [[TMP111]], align 4
-// CHECK7-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 2
-// CHECK7-NEXT:    store i8* null, i8** [[TMP112]], align 4
-// CHECK7-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
+// CHECK7-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 1
+// CHECK7-NEXT:    store i8* null, i8** [[TMP105]], align 4
+// CHECK7-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
+// CHECK7-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i32*
+// CHECK7-NEXT:    store i32 [[TMP1]], i32* [[TMP107]], align 4
+// CHECK7-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
+// CHECK7-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i32*
+// CHECK7-NEXT:    store i32 [[TMP1]], i32* [[TMP109]], align 4
+// CHECK7-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 2
+// CHECK7-NEXT:    store i8* null, i8** [[TMP110]], align 4
+// CHECK7-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
+// CHECK7-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to float**
+// CHECK7-NEXT:    store float* [[VLA]], float** [[TMP112]], align 4
+// CHECK7-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
 // CHECK7-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to float**
 // CHECK7-NEXT:    store float* [[VLA]], float** [[TMP114]], align 4
-// CHECK7-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
-// CHECK7-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to float**
-// CHECK7-NEXT:    store float* [[VLA]], float** [[TMP116]], align 4
-// CHECK7-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK7-NEXT:    store i64 [[TMP91]], i64* [[TMP117]], align 4
-// CHECK7-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 3
-// CHECK7-NEXT:    store i8* null, i8** [[TMP118]], align 4
-// CHECK7-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 4
+// CHECK7-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK7-NEXT:    store i64 [[TMP91]], i64* [[TMP115]], align 4
+// CHECK7-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 3
+// CHECK7-NEXT:    store i8* null, i8** [[TMP116]], align 4
+// CHECK7-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 4
+// CHECK7-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to [5 x [10 x double]]**
+// CHECK7-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP118]], align 4
+// CHECK7-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 4
 // CHECK7-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to [5 x [10 x double]]**
 // CHECK7-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP120]], align 4
-// CHECK7-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 4
-// CHECK7-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to [5 x [10 x double]]**
-// CHECK7-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP122]], align 4
-// CHECK7-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK7-NEXT:    store i64 400, i64* [[TMP123]], align 4
-// CHECK7-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 4
-// CHECK7-NEXT:    store i8* null, i8** [[TMP124]], align 4
-// CHECK7-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 5
-// CHECK7-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i32*
-// CHECK7-NEXT:    store i32 5, i32* [[TMP126]], align 4
-// CHECK7-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 5
+// CHECK7-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 4
+// CHECK7-NEXT:    store i8* null, i8** [[TMP121]], align 4
+// CHECK7-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 5
+// CHECK7-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i32*
+// CHECK7-NEXT:    store i32 5, i32* [[TMP123]], align 4
+// CHECK7-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 5
+// CHECK7-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to i32*
+// CHECK7-NEXT:    store i32 5, i32* [[TMP125]], align 4
+// CHECK7-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 5
+// CHECK7-NEXT:    store i8* null, i8** [[TMP126]], align 4
+// CHECK7-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 6
 // CHECK7-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i32*
-// CHECK7-NEXT:    store i32 5, i32* [[TMP128]], align 4
-// CHECK7-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK7-NEXT:    store i64 4, i64* [[TMP129]], align 4
-// CHECK7-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 5
-// CHECK7-NEXT:    store i8* null, i8** [[TMP130]], align 4
-// CHECK7-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 6
-// CHECK7-NEXT:    [[TMP132:%.*]] = bitcast i8** [[TMP131]] to i32*
-// CHECK7-NEXT:    store i32 [[TMP3]], i32* [[TMP132]], align 4
-// CHECK7-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 6
-// CHECK7-NEXT:    [[TMP134:%.*]] = bitcast i8** [[TMP133]] to i32*
-// CHECK7-NEXT:    store i32 [[TMP3]], i32* [[TMP134]], align 4
-// CHECK7-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK7-NEXT:    store i64 4, i64* [[TMP135]], align 4
-// CHECK7-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 6
-// CHECK7-NEXT:    store i8* null, i8** [[TMP136]], align 4
-// CHECK7-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 7
-// CHECK7-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to double**
-// CHECK7-NEXT:    store double* [[VLA1]], double** [[TMP138]], align 4
-// CHECK7-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 7
-// CHECK7-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to double**
-// CHECK7-NEXT:    store double* [[VLA1]], double** [[TMP140]], align 4
-// CHECK7-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK7-NEXT:    store i64 [[TMP94]], i64* [[TMP141]], align 4
-// CHECK7-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 7
+// CHECK7-NEXT:    store i32 [[TMP3]], i32* [[TMP128]], align 4
+// CHECK7-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 6
+// CHECK7-NEXT:    [[TMP130:%.*]] = bitcast i8** [[TMP129]] to i32*
+// CHECK7-NEXT:    store i32 [[TMP3]], i32* [[TMP130]], align 4
+// CHECK7-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 6
+// CHECK7-NEXT:    store i8* null, i8** [[TMP131]], align 4
+// CHECK7-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 7
+// CHECK7-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to double**
+// CHECK7-NEXT:    store double* [[VLA1]], double** [[TMP133]], align 4
+// CHECK7-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 7
+// CHECK7-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to double**
+// CHECK7-NEXT:    store double* [[VLA1]], double** [[TMP135]], align 4
+// CHECK7-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK7-NEXT:    store i64 [[TMP94]], i64* [[TMP136]], align 4
+// CHECK7-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 7
+// CHECK7-NEXT:    store i8* null, i8** [[TMP137]], align 4
+// CHECK7-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 8
+// CHECK7-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to %struct.TT**
+// CHECK7-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP139]], align 4
+// CHECK7-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 8
+// CHECK7-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to %struct.TT**
+// CHECK7-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP141]], align 4
+// CHECK7-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 8
 // CHECK7-NEXT:    store i8* null, i8** [[TMP142]], align 4
-// CHECK7-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 8
-// CHECK7-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to %struct.TT**
-// CHECK7-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP144]], align 4
-// CHECK7-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 8
-// CHECK7-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to %struct.TT**
-// CHECK7-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP146]], align 4
-// CHECK7-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK7-NEXT:    store i64 12, i64* [[TMP147]], align 4
-// CHECK7-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 8
-// CHECK7-NEXT:    store i8* null, i8** [[TMP148]], align 4
-// CHECK7-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK7-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK7-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK7-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK7-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK7-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK7-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK7-NEXT:    [[TMP152:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP149]], i8** [[TMP150]], i64* [[TMP151]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK7-NEXT:    [[TMP153:%.*]] = icmp ne i32 [[TMP152]], 0
-// CHECK7-NEXT:    br i1 [[TMP153]], label [[OMP_OFFLOAD_FAILED25:%.*]], label [[OMP_OFFLOAD_CONT26:%.*]]
+// CHECK7-NEXT:    [[TMP146:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP143]], i8** [[TMP144]], i64* [[TMP145]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.11, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK7-NEXT:    [[TMP147:%.*]] = icmp ne i32 [[TMP146]], 0
+// CHECK7-NEXT:    br i1 [[TMP147]], label [[OMP_OFFLOAD_FAILED25:%.*]], label [[OMP_OFFLOAD_CONT26:%.*]]
 // CHECK7:       omp_offload.failed25:
 // CHECK7-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142(i32 [[TMP88]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK7-NEXT:    br label [[OMP_OFFLOAD_CONT26]]
@@ -11124,10 +11000,10 @@ int bar(int n){
 // CHECK7-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142(i32 [[TMP88]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK7-NEXT:    br label [[OMP_IF_END28]]
 // CHECK7:       omp_if.end28:
-// CHECK7-NEXT:    [[TMP154:%.*]] = load i32, i32* [[A]], align 4
-// CHECK7-NEXT:    [[TMP155:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK7-NEXT:    call void @llvm.stackrestore(i8* [[TMP155]])
-// CHECK7-NEXT:    ret i32 [[TMP154]]
+// CHECK7-NEXT:    [[TMP148:%.*]] = load i32, i32* [[A]], align 4
+// CHECK7-NEXT:    [[TMP149:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK7-NEXT:    call void @llvm.stackrestore(i8* [[TMP149]])
+// CHECK7-NEXT:    ret i32 [[TMP148]]
 //
 //
 // CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97
@@ -11853,14 +11729,14 @@ int bar(int n){
 // CHECK7-NEXT:    [[TMP10:%.*]] = mul nuw i32 2, [[TMP1]]
 // CHECK7-NEXT:    [[TMP11:%.*]] = mul nuw i32 [[TMP10]], 2
 // CHECK7-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
-// CHECK7-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK7-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to %struct.S1**
-// CHECK7-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP14]], align 4
-// CHECK7-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK7-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to double**
-// CHECK7-NEXT:    store double* [[A]], double** [[TMP16]], align 4
-// CHECK7-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK7-NEXT:    store i64 8, i64* [[TMP17]], align 4
+// CHECK7-NEXT:    [[TMP13:%.*]] = bitcast [6 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK7-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 bitcast ([6 x i64]* @.offload_sizes.13 to i8*), i32 48, i1 false)
+// CHECK7-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK7-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to %struct.S1**
+// CHECK7-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP15]], align 4
+// CHECK7-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK7-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to double**
+// CHECK7-NEXT:    store double* [[A]], double** [[TMP17]], align 4
 // CHECK7-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK7-NEXT:    store i8* null, i8** [[TMP18]], align 4
 // CHECK7-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -11869,57 +11745,49 @@ int bar(int n){
 // CHECK7-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK7-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
 // CHECK7-NEXT:    store i32 [[TMP6]], i32* [[TMP22]], align 4
-// CHECK7-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK7-NEXT:    store i64 4, i64* [[TMP23]], align 4
-// CHECK7-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK7-NEXT:    store i8* null, i8** [[TMP24]], align 4
-// CHECK7-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK7-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
-// CHECK7-NEXT:    store i32 2, i32* [[TMP26]], align 4
-// CHECK7-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK7-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
-// CHECK7-NEXT:    store i32 2, i32* [[TMP28]], align 4
-// CHECK7-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK7-NEXT:    store i64 4, i64* [[TMP29]], align 4
-// CHECK7-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK7-NEXT:    store i8* null, i8** [[TMP30]], align 4
-// CHECK7-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK7-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK7-NEXT:    store i8* null, i8** [[TMP23]], align 4
+// CHECK7-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK7-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
+// CHECK7-NEXT:    store i32 2, i32* [[TMP25]], align 4
+// CHECK7-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK7-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK7-NEXT:    store i32 2, i32* [[TMP27]], align 4
+// CHECK7-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK7-NEXT:    store i8* null, i8** [[TMP28]], align 4
+// CHECK7-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK7-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32*
+// CHECK7-NEXT:    store i32 [[TMP1]], i32* [[TMP30]], align 4
+// CHECK7-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK7-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i32*
 // CHECK7-NEXT:    store i32 [[TMP1]], i32* [[TMP32]], align 4
-// CHECK7-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK7-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i32*
-// CHECK7-NEXT:    store i32 [[TMP1]], i32* [[TMP34]], align 4
-// CHECK7-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK7-NEXT:    store i64 4, i64* [[TMP35]], align 4
-// CHECK7-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK7-NEXT:    store i8* null, i8** [[TMP36]], align 4
-// CHECK7-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK7-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i16**
-// CHECK7-NEXT:    store i16* [[VLA]], i16** [[TMP38]], align 4
-// CHECK7-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK7-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i16**
-// CHECK7-NEXT:    store i16* [[VLA]], i16** [[TMP40]], align 4
-// CHECK7-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK7-NEXT:    store i64 [[TMP12]], i64* [[TMP41]], align 4
-// CHECK7-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK7-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK7-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
-// CHECK7-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK7-NEXT:    store i32 [[TMP8]], i32* [[TMP44]], align 4
-// CHECK7-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
-// CHECK7-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK7-NEXT:    store i32 [[TMP8]], i32* [[TMP46]], align 4
-// CHECK7-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK7-NEXT:    store i64 1, i64* [[TMP47]], align 4
-// CHECK7-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
-// CHECK7-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK7-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK7-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK7-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK7-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK7-NEXT:    store i8* null, i8** [[TMP33]], align 4
+// CHECK7-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK7-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
+// CHECK7-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 4
+// CHECK7-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK7-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
+// CHECK7-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 4
+// CHECK7-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK7-NEXT:    store i64 [[TMP12]], i64* [[TMP38]], align 4
+// CHECK7-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK7-NEXT:    store i8* null, i8** [[TMP39]], align 4
+// CHECK7-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
+// CHECK7-NEXT:    [[TMP41:%.*]] = bitcast i8** [[TMP40]] to i32*
+// CHECK7-NEXT:    store i32 [[TMP8]], i32* [[TMP41]], align 4
+// CHECK7-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK7-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK7-NEXT:    store i32 [[TMP8]], i32* [[TMP43]], align 4
+// CHECK7-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
+// CHECK7-NEXT:    store i8* null, i8** [[TMP44]], align 4
+// CHECK7-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK7-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK7-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK7-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK7-NEXT:    [[TMP52:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215.region_id, i32 6, i8** [[TMP49]], i8** [[TMP50]], i64* [[TMP51]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK7-NEXT:    [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
-// CHECK7-NEXT:    br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK7-NEXT:    [[TMP48:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215.region_id, i32 6, i8** [[TMP45]], i8** [[TMP46]], i64* [[TMP47]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK7-NEXT:    [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0
+// CHECK7-NEXT:    br i1 [[TMP49]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK7:       omp_offload.failed:
 // CHECK7-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215(%struct.S1* [[THIS1]], i32 [[TMP6]], i32 2, i32 [[TMP1]], i16* [[VLA]], i32 [[TMP8]]) #[[ATTR4]]
 // CHECK7-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -11929,15 +11797,15 @@ int bar(int n){
 // CHECK7-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215(%struct.S1* [[THIS1]], i32 [[TMP6]], i32 2, i32 [[TMP1]], i16* [[VLA]], i32 [[TMP8]]) #[[ATTR4]]
 // CHECK7-NEXT:    br label [[OMP_IF_END]]
 // CHECK7:       omp_if.end:
-// CHECK7-NEXT:    [[TMP54:%.*]] = mul nsw i32 1, [[TMP1]]
-// CHECK7-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP54]]
+// CHECK7-NEXT:    [[TMP50:%.*]] = mul nsw i32 1, [[TMP1]]
+// CHECK7-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP50]]
 // CHECK7-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
-// CHECK7-NEXT:    [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX4]], align 2
-// CHECK7-NEXT:    [[CONV5:%.*]] = sext i16 [[TMP55]] to i32
-// CHECK7-NEXT:    [[TMP56:%.*]] = load i32, i32* [[B]], align 4
-// CHECK7-NEXT:    [[ADD6:%.*]] = add nsw i32 [[CONV5]], [[TMP56]]
-// CHECK7-NEXT:    [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK7-NEXT:    call void @llvm.stackrestore(i8* [[TMP57]])
+// CHECK7-NEXT:    [[TMP51:%.*]] = load i16, i16* [[ARRAYIDX4]], align 2
+// CHECK7-NEXT:    [[CONV5:%.*]] = sext i16 [[TMP51]] to i32
+// CHECK7-NEXT:    [[TMP52:%.*]] = load i32, i32* [[B]], align 4
+// CHECK7-NEXT:    [[ADD6:%.*]] = add nsw i32 [[CONV5]], [[TMP52]]
+// CHECK7-NEXT:    [[TMP53:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK7-NEXT:    call void @llvm.stackrestore(i8* [[TMP53]])
 // CHECK7-NEXT:    ret i32 [[ADD6]]
 //
 //
@@ -12040,7 +11908,7 @@ int bar(int n){
 // CHECK7-NEXT:    [[ADD6:%.*]] = add i32 [[TMP40]], 1
 // CHECK7-NEXT:    [[TMP41:%.*]] = zext i32 [[ADD6]] to i64
 // CHECK7-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP41]])
-// CHECK7-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l197.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK7-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l197.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK7-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK7-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK7:       omp_offload.failed:
@@ -12110,7 +11978,7 @@ int bar(int n){
 // CHECK7-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK7-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK7-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK7-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l180.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK7-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l180.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK7-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK7-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK7:       omp_offload.failed:
@@ -12157,11 +12025,11 @@ int bar(int n){
 // CHECK7-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
 // CHECK7-NEXT:    store i8 [[FROMBOOL]], i8* [[CONV3]], align 1
 // CHECK7-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK7-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]], i32 [[TMP7]])
+// CHECK7-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]], i32 [[TMP7]])
 // CHECK7-NEXT:    ret void
 //
 //
-// CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..12
 // CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] {
 // CHECK7-NEXT:  entry:
 // CHECK7-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -12335,11 +12203,11 @@ int bar(int n){
 // CHECK7-NEXT:    [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
 // CHECK7-NEXT:    store i8 [[TMP7]], i8* [[CONV3]], align 1
 // CHECK7-NEXT:    [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
-// CHECK7-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], [10 x i32]* [[TMP0]])
+// CHECK7-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], [10 x i32]* [[TMP0]])
 // CHECK7-NEXT:    ret void
 //
 //
-// CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..13
+// CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[N:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK7-NEXT:  entry:
 // CHECK7-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -12497,11 +12365,11 @@ int bar(int n){
 // CHECK7-NEXT:    [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
 // CHECK7-NEXT:    store i16 [[TMP3]], i16* [[CONV1]], align 2
 // CHECK7-NEXT:    [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4
-// CHECK7-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK7-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK7-NEXT:    ret void
 //
 //
-// CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK7-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK7-NEXT:  entry:
 // CHECK7-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -12800,14 +12668,14 @@ int bar(int n){
 // CHECK8-NEXT:    [[TMP92:%.*]] = mul nuw i32 5, [[TMP3]]
 // CHECK8-NEXT:    [[TMP93:%.*]] = mul nuw i32 [[TMP92]], 8
 // CHECK8-NEXT:    [[TMP94:%.*]] = sext i32 [[TMP93]] to i64
-// CHECK8-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK8-NEXT:    [[TMP96:%.*]] = bitcast i8** [[TMP95]] to i32*
-// CHECK8-NEXT:    store i32 [[TMP88]], i32* [[TMP96]], align 4
-// CHECK8-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK8-NEXT:    [[TMP98:%.*]] = bitcast i8** [[TMP97]] to i32*
-// CHECK8-NEXT:    store i32 [[TMP88]], i32* [[TMP98]], align 4
-// CHECK8-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK8-NEXT:    store i64 4, i64* [[TMP99]], align 4
+// CHECK8-NEXT:    [[TMP95:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK8-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP95]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.10 to i8*), i32 72, i1 false)
+// CHECK8-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK8-NEXT:    [[TMP97:%.*]] = bitcast i8** [[TMP96]] to i32*
+// CHECK8-NEXT:    store i32 [[TMP88]], i32* [[TMP97]], align 4
+// CHECK8-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK8-NEXT:    [[TMP99:%.*]] = bitcast i8** [[TMP98]] to i32*
+// CHECK8-NEXT:    store i32 [[TMP88]], i32* [[TMP99]], align 4
 // CHECK8-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 0
 // CHECK8-NEXT:    store i8* null, i8** [[TMP100]], align 4
 // CHECK8-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
@@ -12816,87 +12684,75 @@ int bar(int n){
 // CHECK8-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
 // CHECK8-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to [10 x float]**
 // CHECK8-NEXT:    store [10 x float]* [[B]], [10 x float]** [[TMP104]], align 4
-// CHECK8-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK8-NEXT:    store i64 40, i64* [[TMP105]], align 4
-// CHECK8-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 1
-// CHECK8-NEXT:    store i8* null, i8** [[TMP106]], align 4
-// CHECK8-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
-// CHECK8-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i32*
-// CHECK8-NEXT:    store i32 [[TMP1]], i32* [[TMP108]], align 4
-// CHECK8-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
-// CHECK8-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i32*
-// CHECK8-NEXT:    store i32 [[TMP1]], i32* [[TMP110]], align 4
-// CHECK8-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK8-NEXT:    store i64 4, i64* [[TMP111]], align 4
-// CHECK8-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 2
-// CHECK8-NEXT:    store i8* null, i8** [[TMP112]], align 4
-// CHECK8-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
+// CHECK8-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 1
+// CHECK8-NEXT:    store i8* null, i8** [[TMP105]], align 4
+// CHECK8-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
+// CHECK8-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i32*
+// CHECK8-NEXT:    store i32 [[TMP1]], i32* [[TMP107]], align 4
+// CHECK8-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
+// CHECK8-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i32*
+// CHECK8-NEXT:    store i32 [[TMP1]], i32* [[TMP109]], align 4
+// CHECK8-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 2
+// CHECK8-NEXT:    store i8* null, i8** [[TMP110]], align 4
+// CHECK8-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
+// CHECK8-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to float**
+// CHECK8-NEXT:    store float* [[VLA]], float** [[TMP112]], align 4
+// CHECK8-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
 // CHECK8-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to float**
 // CHECK8-NEXT:    store float* [[VLA]], float** [[TMP114]], align 4
-// CHECK8-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
-// CHECK8-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to float**
-// CHECK8-NEXT:    store float* [[VLA]], float** [[TMP116]], align 4
-// CHECK8-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK8-NEXT:    store i64 [[TMP91]], i64* [[TMP117]], align 4
-// CHECK8-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 3
-// CHECK8-NEXT:    store i8* null, i8** [[TMP118]], align 4
-// CHECK8-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 4
+// CHECK8-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK8-NEXT:    store i64 [[TMP91]], i64* [[TMP115]], align 4
+// CHECK8-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 3
+// CHECK8-NEXT:    store i8* null, i8** [[TMP116]], align 4
+// CHECK8-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 4
+// CHECK8-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to [5 x [10 x double]]**
+// CHECK8-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP118]], align 4
+// CHECK8-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 4
 // CHECK8-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to [5 x [10 x double]]**
 // CHECK8-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP120]], align 4
-// CHECK8-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 4
-// CHECK8-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to [5 x [10 x double]]**
-// CHECK8-NEXT:    store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP122]], align 4
-// CHECK8-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK8-NEXT:    store i64 400, i64* [[TMP123]], align 4
-// CHECK8-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 4
-// CHECK8-NEXT:    store i8* null, i8** [[TMP124]], align 4
-// CHECK8-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 5
-// CHECK8-NEXT:    [[TMP126:%.*]] = bitcast i8** [[TMP125]] to i32*
-// CHECK8-NEXT:    store i32 5, i32* [[TMP126]], align 4
-// CHECK8-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 5
+// CHECK8-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 4
+// CHECK8-NEXT:    store i8* null, i8** [[TMP121]], align 4
+// CHECK8-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 5
+// CHECK8-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i32*
+// CHECK8-NEXT:    store i32 5, i32* [[TMP123]], align 4
+// CHECK8-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 5
+// CHECK8-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to i32*
+// CHECK8-NEXT:    store i32 5, i32* [[TMP125]], align 4
+// CHECK8-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 5
+// CHECK8-NEXT:    store i8* null, i8** [[TMP126]], align 4
+// CHECK8-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 6
 // CHECK8-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i32*
-// CHECK8-NEXT:    store i32 5, i32* [[TMP128]], align 4
-// CHECK8-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK8-NEXT:    store i64 4, i64* [[TMP129]], align 4
-// CHECK8-NEXT:    [[TMP130:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 5
-// CHECK8-NEXT:    store i8* null, i8** [[TMP130]], align 4
-// CHECK8-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 6
-// CHECK8-NEXT:    [[TMP132:%.*]] = bitcast i8** [[TMP131]] to i32*
-// CHECK8-NEXT:    store i32 [[TMP3]], i32* [[TMP132]], align 4
-// CHECK8-NEXT:    [[TMP133:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 6
-// CHECK8-NEXT:    [[TMP134:%.*]] = bitcast i8** [[TMP133]] to i32*
-// CHECK8-NEXT:    store i32 [[TMP3]], i32* [[TMP134]], align 4
-// CHECK8-NEXT:    [[TMP135:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK8-NEXT:    store i64 4, i64* [[TMP135]], align 4
-// CHECK8-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 6
-// CHECK8-NEXT:    store i8* null, i8** [[TMP136]], align 4
-// CHECK8-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 7
-// CHECK8-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to double**
-// CHECK8-NEXT:    store double* [[VLA1]], double** [[TMP138]], align 4
-// CHECK8-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 7
-// CHECK8-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to double**
-// CHECK8-NEXT:    store double* [[VLA1]], double** [[TMP140]], align 4
-// CHECK8-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK8-NEXT:    store i64 [[TMP94]], i64* [[TMP141]], align 4
-// CHECK8-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 7
+// CHECK8-NEXT:    store i32 [[TMP3]], i32* [[TMP128]], align 4
+// CHECK8-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 6
+// CHECK8-NEXT:    [[TMP130:%.*]] = bitcast i8** [[TMP129]] to i32*
+// CHECK8-NEXT:    store i32 [[TMP3]], i32* [[TMP130]], align 4
+// CHECK8-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 6
+// CHECK8-NEXT:    store i8* null, i8** [[TMP131]], align 4
+// CHECK8-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 7
+// CHECK8-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to double**
+// CHECK8-NEXT:    store double* [[VLA1]], double** [[TMP133]], align 4
+// CHECK8-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 7
+// CHECK8-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to double**
+// CHECK8-NEXT:    store double* [[VLA1]], double** [[TMP135]], align 4
+// CHECK8-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
+// CHECK8-NEXT:    store i64 [[TMP94]], i64* [[TMP136]], align 4
+// CHECK8-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 7
+// CHECK8-NEXT:    store i8* null, i8** [[TMP137]], align 4
+// CHECK8-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 8
+// CHECK8-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to %struct.TT**
+// CHECK8-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP139]], align 4
+// CHECK8-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 8
+// CHECK8-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to %struct.TT**
+// CHECK8-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP141]], align 4
+// CHECK8-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 8
 // CHECK8-NEXT:    store i8* null, i8** [[TMP142]], align 4
-// CHECK8-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 8
-// CHECK8-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to %struct.TT**
-// CHECK8-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP144]], align 4
-// CHECK8-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 8
-// CHECK8-NEXT:    [[TMP146:%.*]] = bitcast i8** [[TMP145]] to %struct.TT**
-// CHECK8-NEXT:    store %struct.TT* [[D]], %struct.TT** [[TMP146]], align 4
-// CHECK8-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK8-NEXT:    store i64 12, i64* [[TMP147]], align 4
-// CHECK8-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i32 0, i32 8
-// CHECK8-NEXT:    store i8* null, i8** [[TMP148]], align 4
-// CHECK8-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK8-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK8-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK8-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK8-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK8-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK8-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK8-NEXT:    [[TMP152:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP149]], i8** [[TMP150]], i64* [[TMP151]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK8-NEXT:    [[TMP153:%.*]] = icmp ne i32 [[TMP152]], 0
-// CHECK8-NEXT:    br i1 [[TMP153]], label [[OMP_OFFLOAD_FAILED25:%.*]], label [[OMP_OFFLOAD_CONT26:%.*]]
+// CHECK8-NEXT:    [[TMP146:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.region_id, i32 9, i8** [[TMP143]], i8** [[TMP144]], i64* [[TMP145]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.11, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK8-NEXT:    [[TMP147:%.*]] = icmp ne i32 [[TMP146]], 0
+// CHECK8-NEXT:    br i1 [[TMP147]], label [[OMP_OFFLOAD_FAILED25:%.*]], label [[OMP_OFFLOAD_CONT26:%.*]]
 // CHECK8:       omp_offload.failed25:
 // CHECK8-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142(i32 [[TMP88]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK8-NEXT:    br label [[OMP_OFFLOAD_CONT26]]
@@ -12906,10 +12762,10 @@ int bar(int n){
 // CHECK8-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142(i32 [[TMP88]], [10 x float]* [[B]], i32 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP3]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR4]]
 // CHECK8-NEXT:    br label [[OMP_IF_END28]]
 // CHECK8:       omp_if.end28:
-// CHECK8-NEXT:    [[TMP154:%.*]] = load i32, i32* [[A]], align 4
-// CHECK8-NEXT:    [[TMP155:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK8-NEXT:    call void @llvm.stackrestore(i8* [[TMP155]])
-// CHECK8-NEXT:    ret i32 [[TMP154]]
+// CHECK8-NEXT:    [[TMP148:%.*]] = load i32, i32* [[A]], align 4
+// CHECK8-NEXT:    [[TMP149:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK8-NEXT:    call void @llvm.stackrestore(i8* [[TMP149]])
+// CHECK8-NEXT:    ret i32 [[TMP148]]
 //
 //
 // CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97
@@ -13635,14 +13491,14 @@ int bar(int n){
 // CHECK8-NEXT:    [[TMP10:%.*]] = mul nuw i32 2, [[TMP1]]
 // CHECK8-NEXT:    [[TMP11:%.*]] = mul nuw i32 [[TMP10]], 2
 // CHECK8-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
-// CHECK8-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK8-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to %struct.S1**
-// CHECK8-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP14]], align 4
-// CHECK8-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK8-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to double**
-// CHECK8-NEXT:    store double* [[A]], double** [[TMP16]], align 4
-// CHECK8-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK8-NEXT:    store i64 8, i64* [[TMP17]], align 4
+// CHECK8-NEXT:    [[TMP13:%.*]] = bitcast [6 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK8-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 bitcast ([6 x i64]* @.offload_sizes.13 to i8*), i32 48, i1 false)
+// CHECK8-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK8-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to %struct.S1**
+// CHECK8-NEXT:    store %struct.S1* [[THIS1]], %struct.S1** [[TMP15]], align 4
+// CHECK8-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK8-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to double**
+// CHECK8-NEXT:    store double* [[A]], double** [[TMP17]], align 4
 // CHECK8-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK8-NEXT:    store i8* null, i8** [[TMP18]], align 4
 // CHECK8-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -13651,57 +13507,49 @@ int bar(int n){
 // CHECK8-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK8-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
 // CHECK8-NEXT:    store i32 [[TMP6]], i32* [[TMP22]], align 4
-// CHECK8-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK8-NEXT:    store i64 4, i64* [[TMP23]], align 4
-// CHECK8-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK8-NEXT:    store i8* null, i8** [[TMP24]], align 4
-// CHECK8-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK8-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
-// CHECK8-NEXT:    store i32 2, i32* [[TMP26]], align 4
-// CHECK8-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK8-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
-// CHECK8-NEXT:    store i32 2, i32* [[TMP28]], align 4
-// CHECK8-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK8-NEXT:    store i64 4, i64* [[TMP29]], align 4
-// CHECK8-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK8-NEXT:    store i8* null, i8** [[TMP30]], align 4
-// CHECK8-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK8-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK8-NEXT:    store i8* null, i8** [[TMP23]], align 4
+// CHECK8-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK8-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
+// CHECK8-NEXT:    store i32 2, i32* [[TMP25]], align 4
+// CHECK8-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK8-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK8-NEXT:    store i32 2, i32* [[TMP27]], align 4
+// CHECK8-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK8-NEXT:    store i8* null, i8** [[TMP28]], align 4
+// CHECK8-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK8-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32*
+// CHECK8-NEXT:    store i32 [[TMP1]], i32* [[TMP30]], align 4
+// CHECK8-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK8-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i32*
 // CHECK8-NEXT:    store i32 [[TMP1]], i32* [[TMP32]], align 4
-// CHECK8-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK8-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i32*
-// CHECK8-NEXT:    store i32 [[TMP1]], i32* [[TMP34]], align 4
-// CHECK8-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK8-NEXT:    store i64 4, i64* [[TMP35]], align 4
-// CHECK8-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK8-NEXT:    store i8* null, i8** [[TMP36]], align 4
-// CHECK8-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK8-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i16**
-// CHECK8-NEXT:    store i16* [[VLA]], i16** [[TMP38]], align 4
-// CHECK8-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK8-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i16**
-// CHECK8-NEXT:    store i16* [[VLA]], i16** [[TMP40]], align 4
-// CHECK8-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK8-NEXT:    store i64 [[TMP12]], i64* [[TMP41]], align 4
-// CHECK8-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK8-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK8-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
-// CHECK8-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK8-NEXT:    store i32 [[TMP8]], i32* [[TMP44]], align 4
-// CHECK8-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
-// CHECK8-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK8-NEXT:    store i32 [[TMP8]], i32* [[TMP46]], align 4
-// CHECK8-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK8-NEXT:    store i64 1, i64* [[TMP47]], align 4
-// CHECK8-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
-// CHECK8-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK8-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK8-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK8-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK8-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK8-NEXT:    store i8* null, i8** [[TMP33]], align 4
+// CHECK8-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK8-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i16**
+// CHECK8-NEXT:    store i16* [[VLA]], i16** [[TMP35]], align 4
+// CHECK8-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK8-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i16**
+// CHECK8-NEXT:    store i16* [[VLA]], i16** [[TMP37]], align 4
+// CHECK8-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK8-NEXT:    store i64 [[TMP12]], i64* [[TMP38]], align 4
+// CHECK8-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK8-NEXT:    store i8* null, i8** [[TMP39]], align 4
+// CHECK8-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
+// CHECK8-NEXT:    [[TMP41:%.*]] = bitcast i8** [[TMP40]] to i32*
+// CHECK8-NEXT:    store i32 [[TMP8]], i32* [[TMP41]], align 4
+// CHECK8-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK8-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK8-NEXT:    store i32 [[TMP8]], i32* [[TMP43]], align 4
+// CHECK8-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
+// CHECK8-NEXT:    store i8* null, i8** [[TMP44]], align 4
+// CHECK8-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK8-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK8-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [6 x i64], [6 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK8-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK8-NEXT:    [[TMP52:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215.region_id, i32 6, i8** [[TMP49]], i8** [[TMP50]], i64* [[TMP51]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK8-NEXT:    [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
-// CHECK8-NEXT:    br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK8-NEXT:    [[TMP48:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215.region_id, i32 6, i8** [[TMP45]], i8** [[TMP46]], i64* [[TMP47]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK8-NEXT:    [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0
+// CHECK8-NEXT:    br i1 [[TMP49]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK8:       omp_offload.failed:
 // CHECK8-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215(%struct.S1* [[THIS1]], i32 [[TMP6]], i32 2, i32 [[TMP1]], i16* [[VLA]], i32 [[TMP8]]) #[[ATTR4]]
 // CHECK8-NEXT:    br label [[OMP_OFFLOAD_CONT]]
@@ -13711,15 +13559,15 @@ int bar(int n){
 // CHECK8-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215(%struct.S1* [[THIS1]], i32 [[TMP6]], i32 2, i32 [[TMP1]], i16* [[VLA]], i32 [[TMP8]]) #[[ATTR4]]
 // CHECK8-NEXT:    br label [[OMP_IF_END]]
 // CHECK8:       omp_if.end:
-// CHECK8-NEXT:    [[TMP54:%.*]] = mul nsw i32 1, [[TMP1]]
-// CHECK8-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP54]]
+// CHECK8-NEXT:    [[TMP50:%.*]] = mul nsw i32 1, [[TMP1]]
+// CHECK8-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP50]]
 // CHECK8-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1
-// CHECK8-NEXT:    [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX4]], align 2
-// CHECK8-NEXT:    [[CONV5:%.*]] = sext i16 [[TMP55]] to i32
-// CHECK8-NEXT:    [[TMP56:%.*]] = load i32, i32* [[B]], align 4
-// CHECK8-NEXT:    [[ADD6:%.*]] = add nsw i32 [[CONV5]], [[TMP56]]
-// CHECK8-NEXT:    [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK8-NEXT:    call void @llvm.stackrestore(i8* [[TMP57]])
+// CHECK8-NEXT:    [[TMP51:%.*]] = load i16, i16* [[ARRAYIDX4]], align 2
+// CHECK8-NEXT:    [[CONV5:%.*]] = sext i16 [[TMP51]] to i32
+// CHECK8-NEXT:    [[TMP52:%.*]] = load i32, i32* [[B]], align 4
+// CHECK8-NEXT:    [[ADD6:%.*]] = add nsw i32 [[CONV5]], [[TMP52]]
+// CHECK8-NEXT:    [[TMP53:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK8-NEXT:    call void @llvm.stackrestore(i8* [[TMP53]])
 // CHECK8-NEXT:    ret i32 [[ADD6]]
 //
 //
@@ -13822,7 +13670,7 @@ int bar(int n){
 // CHECK8-NEXT:    [[ADD6:%.*]] = add i32 [[TMP40]], 1
 // CHECK8-NEXT:    [[TMP41:%.*]] = zext i32 [[ADD6]] to i64
 // CHECK8-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP41]])
-// CHECK8-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l197.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK8-NEXT:    [[TMP42:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l197.region_id, i32 5, i8** [[TMP34]], i8** [[TMP35]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK8-NEXT:    [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
 // CHECK8-NEXT:    br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK8:       omp_offload.failed:
@@ -13892,7 +13740,7 @@ int bar(int n){
 // CHECK8-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK8-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK8-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK8-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l180.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK8-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l180.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK8-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK8-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK8:       omp_offload.failed:
@@ -13939,11 +13787,11 @@ int bar(int n){
 // CHECK8-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
 // CHECK8-NEXT:    store i8 [[FROMBOOL]], i8* [[CONV3]], align 1
 // CHECK8-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK8-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]], i32 [[TMP7]])
+// CHECK8-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S1*, i32, i32, i32, i16*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), %struct.S1* [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], i16* [[TMP3]], i32 [[TMP7]])
 // CHECK8-NEXT:    ret void
 //
 //
-// CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..12
 // CHECK8-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] {
 // CHECK8-NEXT:  entry:
 // CHECK8-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -14117,11 +13965,11 @@ int bar(int n){
 // CHECK8-NEXT:    [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8*
 // CHECK8-NEXT:    store i8 [[TMP7]], i8* [[CONV3]], align 1
 // CHECK8-NEXT:    [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4
-// CHECK8-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], [10 x i32]* [[TMP0]])
+// CHECK8-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], [10 x i32]* [[TMP0]])
 // CHECK8-NEXT:    ret void
 //
 //
-// CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..13
+// CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK8-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[N:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK8-NEXT:  entry:
 // CHECK8-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -14279,11 +14127,11 @@ int bar(int n){
 // CHECK8-NEXT:    [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16*
 // CHECK8-NEXT:    store i16 [[TMP3]], i16* [[CONV1]], align 2
 // CHECK8-NEXT:    [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4
-// CHECK8-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
+// CHECK8-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP2]], i32 [[TMP4]], [10 x i32]* [[TMP0]])
 // CHECK8-NEXT:    ret void
 //
 //
-// CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK8-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] {
 // CHECK8-NEXT:  entry:
 // CHECK8-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4

diff  --git a/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp
index 6cdb8dad158a8..972a04fffd5d7 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp
@@ -1025,14 +1025,14 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP9:%.*]] = load i64, i64* [[M_CASTED]], align 8
 // CHECK9-NEXT:    [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]]
 // CHECK9-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
-// CHECK9-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP13]], align 8
-// CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP15]], align 8
-// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP16]], align 8
+// CHECK9-NEXT:    [[TMP12:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP12]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes to i8*), i64 40, i1 false)
+// CHECK9-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP14]], align 8
+// CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP16]], align 8
 // CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK9-NEXT:    store i8* null, i8** [[TMP17]], align 8
 // CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1041,75 +1041,69 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP9]], i64* [[TMP21]], align 8
-// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 4, i64* [[TMP22]], align 8
-// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP25]], align 8
-// CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP27]], align 8
-// CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 8, i64* [[TMP28]], align 8
-// CHECK9-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP29]], align 8
-// CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP24]], align 8
+// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP26]], align 8
+// CHECK9-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP27]], align 8
+// CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP3]], i64* [[TMP29]], align 8
+// CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK9-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP3]], i64* [[TMP31]], align 8
-// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP3]], i64* [[TMP33]], align 8
-// CHECK9-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK9-NEXT:    store i64 8, i64* [[TMP34]], align 8
-// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP35]], align 8
-// CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP37]], align 8
-// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP39]], align 8
-// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK9-NEXT:    store i64 [[TMP11]], i64* [[TMP40]], align 8
-// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK9-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP45:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[TMP46:%.*]] = load i32, i32* [[M]], align 4
-// CHECK9-NEXT:    store i32 [[TMP46]], i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK9-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP32]], align 8
+// CHECK9-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP34]], align 8
+// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 8
+// CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK9-NEXT:    store i64 [[TMP11]], i64* [[TMP37]], align 8
+// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK9-NEXT:    store i8* null, i8** [[TMP38]], align 8
+// CHECK9-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP42:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[TMP43:%.*]] = load i32, i32* [[M]], align 4
+// CHECK9-NEXT:    store i32 [[TMP43]], i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK9-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK9-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK9-NEXT:    [[CONV5:%.*]] = sext i32 [[DIV]] to i64
-// CHECK9-NEXT:    [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK9-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP48]], 0
+// CHECK9-NEXT:    [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK9-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0
 // CHECK9-NEXT:    [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
 // CHECK9-NEXT:    [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
 // CHECK9-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]]
 // CHECK9-NEXT:    [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK9-NEXT:    store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK9-NEXT:    [[TMP49:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP49]], 1
+// CHECK9-NEXT:    [[TMP46:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
+// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP46]], 1
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK9-NEXT:    [[TMP50:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, i32 5, i8** [[TMP42]], i8** [[TMP43]], i64* [[TMP44]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK9-NEXT:    [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
-// CHECK9-NEXT:    br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[TMP47:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, i32 5, i8** [[TMP39]], i8** [[TMP40]], i64* [[TMP41]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK9-NEXT:    [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0
+// CHECK9-NEXT:    br i1 [[TMP48]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], i32* [[VLA]]) #[[ATTR4:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[TMP52:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP52]])
+// CHECK9-NEXT:    [[TMP49:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP49]])
 // CHECK9-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    [[TMP53:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP53]])
-// CHECK9-NEXT:    [[TMP54:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    ret i32 [[TMP54]]
+// CHECK9-NEXT:    [[TMP50:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP50]])
+// CHECK9-NEXT:    [[TMP51:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    ret i32 [[TMP51]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80
@@ -1311,7 +1305,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
+// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR6:[0-9]+]] comdat {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -1332,7 +1326,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 20)
-// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK9-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK9-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
@@ -1442,7 +1436,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK9-SAME: () #[[ATTR6:[0-9]+]] {
+// CHECK9-SAME: () #[[ATTR7:[0-9]+]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK9-NEXT:    ret void
@@ -1495,14 +1489,14 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP9:%.*]] = load i64, i64* [[M_CASTED]], align 8
 // CHECK10-NEXT:    [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]]
 // CHECK10-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
-// CHECK10-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP13]], align 8
-// CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP15]], align 8
-// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP16]], align 8
+// CHECK10-NEXT:    [[TMP12:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP12]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes to i8*), i64 40, i1 false)
+// CHECK10-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP14]], align 8
+// CHECK10-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP16]], align 8
 // CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK10-NEXT:    store i8* null, i8** [[TMP17]], align 8
 // CHECK10-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1511,75 +1505,69 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP9]], i64* [[TMP21]], align 8
-// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 4, i64* [[TMP22]], align 8
-// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP25]], align 8
-// CHECK10-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP27]], align 8
-// CHECK10-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 8, i64* [[TMP28]], align 8
-// CHECK10-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP29]], align 8
-// CHECK10-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP24]], align 8
+// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP26]], align 8
+// CHECK10-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP27]], align 8
+// CHECK10-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP3]], i64* [[TMP29]], align 8
+// CHECK10-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK10-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP3]], i64* [[TMP31]], align 8
-// CHECK10-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK10-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP3]], i64* [[TMP33]], align 8
-// CHECK10-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK10-NEXT:    store i64 8, i64* [[TMP34]], align 8
-// CHECK10-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK10-NEXT:    store i8* null, i8** [[TMP35]], align 8
-// CHECK10-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK10-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP37]], align 8
-// CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK10-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP39]], align 8
-// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK10-NEXT:    store i64 [[TMP11]], i64* [[TMP40]], align 8
-// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK10-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK10-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP45:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[TMP46:%.*]] = load i32, i32* [[M]], align 4
-// CHECK10-NEXT:    store i32 [[TMP46]], i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK10-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK10-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK10-NEXT:    store i8* null, i8** [[TMP32]], align 8
+// CHECK10-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK10-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP34]], align 8
+// CHECK10-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK10-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 8
+// CHECK10-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK10-NEXT:    store i64 [[TMP11]], i64* [[TMP37]], align 8
+// CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK10-NEXT:    store i8* null, i8** [[TMP38]], align 8
+// CHECK10-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP42:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[TMP43:%.*]] = load i32, i32* [[M]], align 4
+// CHECK10-NEXT:    store i32 [[TMP43]], i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK10-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK10-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK10-NEXT:    [[CONV5:%.*]] = sext i32 [[DIV]] to i64
-// CHECK10-NEXT:    [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK10-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP48]], 0
+// CHECK10-NEXT:    [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK10-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0
 // CHECK10-NEXT:    [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
 // CHECK10-NEXT:    [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
 // CHECK10-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]]
 // CHECK10-NEXT:    [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK10-NEXT:    store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK10-NEXT:    [[TMP49:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP49]], 1
+// CHECK10-NEXT:    [[TMP46:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
+// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP46]], 1
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK10-NEXT:    [[TMP50:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, i32 5, i8** [[TMP42]], i8** [[TMP43]], i64* [[TMP44]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK10-NEXT:    [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
-// CHECK10-NEXT:    br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK10-NEXT:    [[TMP47:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, i32 5, i8** [[TMP39]], i8** [[TMP40]], i64* [[TMP41]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK10-NEXT:    [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0
+// CHECK10-NEXT:    br i1 [[TMP48]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
 // CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], i32* [[VLA]]) #[[ATTR4:[0-9]+]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK10:       omp_offload.cont:
-// CHECK10-NEXT:    [[TMP52:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP52]])
+// CHECK10-NEXT:    [[TMP49:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP49]])
 // CHECK10-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    [[TMP53:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP53]])
-// CHECK10-NEXT:    [[TMP54:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    ret i32 [[TMP54]]
+// CHECK10-NEXT:    [[TMP50:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP50]])
+// CHECK10-NEXT:    [[TMP51:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK10-NEXT:    ret i32 [[TMP51]]
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80
@@ -1781,7 +1769,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
+// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR6:[0-9]+]] comdat {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -1802,7 +1790,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 20)
-// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK10-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK10-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
@@ -1912,7 +1900,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK10-SAME: () #[[ATTR6:[0-9]+]] {
+// CHECK10-SAME: () #[[ATTR7:[0-9]+]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK10-NEXT:    ret void
@@ -1962,14 +1950,14 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP0]], [[TMP1]]
 // CHECK11-NEXT:    [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4
 // CHECK11-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
-// CHECK11-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP12]], align 4
-// CHECK11-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP14]], align 4
-// CHECK11-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP15]], align 4
+// CHECK11-NEXT:    [[TMP11:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes to i8*), i32 40, i1 false)
+// CHECK11-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP13]], align 4
+// CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP15]], align 4
 // CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK11-NEXT:    store i8* null, i8** [[TMP16]], align 4
 // CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1978,75 +1966,69 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP7]], i32* [[TMP20]], align 4
-// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP21]], align 4
-// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP22]], align 4
-// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP24]], align 4
-// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP26]], align 4
-// CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 4, i64* [[TMP27]], align 4
-// CHECK11-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP28]], align 4
-// CHECK11-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP21]], align 4
+// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP23]], align 4
+// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP25]], align 4
+// CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP26]], align 4
+// CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP1]], i32* [[TMP28]], align 4
+// CHECK11-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK11-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP1]], i32* [[TMP30]], align 4
-// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP1]], i32* [[TMP32]], align 4
-// CHECK11-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK11-NEXT:    store i64 4, i64* [[TMP33]], align 4
-// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP34]], align 4
-// CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 4
-// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP38]], align 4
-// CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK11-NEXT:    store i64 [[TMP10]], i64* [[TMP39]], align 4
-// CHECK11-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK11-NEXT:    store i8* null, i8** [[TMP40]], align 4
-// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP44:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP44]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[TMP45:%.*]] = load i32, i32* [[M]], align 4
-// CHECK11-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK11-NEXT:    [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP46]], 0
+// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP31]], align 4
+// CHECK11-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP33]], align 4
+// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP35]], align 4
+// CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK11-NEXT:    store i64 [[TMP10]], i64* [[TMP36]], align 4
+// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK11-NEXT:    store i8* null, i8** [[TMP37]], align 4
+// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP41:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP41]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[TMP42:%.*]] = load i32, i32* [[M]], align 4
+// CHECK11-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT:    [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0
 // CHECK11-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK11-NEXT:    [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK11-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK11-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK11-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK11-NEXT:    [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
 // CHECK11-NEXT:    [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
 // CHECK11-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
 // CHECK11-NEXT:    [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK11-NEXT:    store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK11-NEXT:    [[TMP48:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP48]], 1
+// CHECK11-NEXT:    [[TMP45:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP45]], 1
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK11-NEXT:    [[TMP49:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, i32 5, i8** [[TMP41]], i8** [[TMP42]], i64* [[TMP43]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK11-NEXT:    [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0
-// CHECK11-NEXT:    br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[TMP46:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, i32 5, i8** [[TMP38]], i8** [[TMP39]], i64* [[TMP40]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK11-NEXT:    [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0
+// CHECK11-NEXT:    br i1 [[TMP47]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], i32* [[VLA]]) #[[ATTR4:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[TMP51:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP51]])
+// CHECK11-NEXT:    [[TMP48:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP48]])
 // CHECK11-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    [[TMP52:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP52]])
-// CHECK11-NEXT:    [[TMP53:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    ret i32 [[TMP53]]
+// CHECK11-NEXT:    [[TMP49:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP49]])
+// CHECK11-NEXT:    [[TMP50:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK11-NEXT:    ret i32 [[TMP50]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80
@@ -2240,7 +2222,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
+// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR6:[0-9]+]] comdat {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -2261,7 +2243,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 20)
-// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK11-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK11-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
@@ -2369,7 +2351,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK11-SAME: () #[[ATTR6:[0-9]+]] {
+// CHECK11-SAME: () #[[ATTR7:[0-9]+]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK11-NEXT:    ret void
@@ -2419,14 +2401,14 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP0]], [[TMP1]]
 // CHECK12-NEXT:    [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4
 // CHECK12-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
-// CHECK12-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP12]], align 4
-// CHECK12-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP14]], align 4
-// CHECK12-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP15]], align 4
+// CHECK12-NEXT:    [[TMP11:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes to i8*), i32 40, i1 false)
+// CHECK12-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP13]], align 4
+// CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP15]], align 4
 // CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK12-NEXT:    store i8* null, i8** [[TMP16]], align 4
 // CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2435,75 +2417,69 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP7]], i32* [[TMP20]], align 4
-// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP21]], align 4
-// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP22]], align 4
-// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP24]], align 4
-// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP26]], align 4
-// CHECK12-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 4, i64* [[TMP27]], align 4
-// CHECK12-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP28]], align 4
-// CHECK12-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP21]], align 4
+// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP23]], align 4
+// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP25]], align 4
+// CHECK12-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP26]], align 4
+// CHECK12-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP1]], i32* [[TMP28]], align 4
+// CHECK12-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK12-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP1]], i32* [[TMP30]], align 4
-// CHECK12-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK12-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP1]], i32* [[TMP32]], align 4
-// CHECK12-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK12-NEXT:    store i64 4, i64* [[TMP33]], align 4
-// CHECK12-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK12-NEXT:    store i8* null, i8** [[TMP34]], align 4
-// CHECK12-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK12-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 4
-// CHECK12-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK12-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP38]], align 4
-// CHECK12-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK12-NEXT:    store i64 [[TMP10]], i64* [[TMP39]], align 4
-// CHECK12-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK12-NEXT:    store i8* null, i8** [[TMP40]], align 4
-// CHECK12-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP44:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP44]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[TMP45:%.*]] = load i32, i32* [[M]], align 4
-// CHECK12-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK12-NEXT:    [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP46]], 0
+// CHECK12-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK12-NEXT:    store i8* null, i8** [[TMP31]], align 4
+// CHECK12-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK12-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP33]], align 4
+// CHECK12-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK12-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP35]], align 4
+// CHECK12-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK12-NEXT:    store i64 [[TMP10]], i64* [[TMP36]], align 4
+// CHECK12-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK12-NEXT:    store i8* null, i8** [[TMP37]], align 4
+// CHECK12-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP41:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP41]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[TMP42:%.*]] = load i32, i32* [[M]], align 4
+// CHECK12-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK12-NEXT:    [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0
 // CHECK12-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK12-NEXT:    [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK12-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK12-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK12-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK12-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK12-NEXT:    [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
 // CHECK12-NEXT:    [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
 // CHECK12-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
 // CHECK12-NEXT:    [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK12-NEXT:    store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK12-NEXT:    [[TMP48:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP48]], 1
+// CHECK12-NEXT:    [[TMP45:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP45]], 1
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK12-NEXT:    [[TMP49:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, i32 5, i8** [[TMP41]], i8** [[TMP42]], i64* [[TMP43]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK12-NEXT:    [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0
-// CHECK12-NEXT:    br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK12-NEXT:    [[TMP46:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, i32 5, i8** [[TMP38]], i8** [[TMP39]], i64* [[TMP40]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK12-NEXT:    [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0
+// CHECK12-NEXT:    br i1 [[TMP47]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
 // CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], i32* [[VLA]]) #[[ATTR4:[0-9]+]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK12:       omp_offload.cont:
-// CHECK12-NEXT:    [[TMP51:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP51]])
+// CHECK12-NEXT:    [[TMP48:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP48]])
 // CHECK12-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    [[TMP52:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP52]])
-// CHECK12-NEXT:    [[TMP53:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    ret i32 [[TMP53]]
+// CHECK12-NEXT:    [[TMP49:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP49]])
+// CHECK12-NEXT:    [[TMP50:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK12-NEXT:    ret i32 [[TMP50]]
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80
@@ -2697,7 +2673,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
+// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR6:[0-9]+]] comdat {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK12-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -2718,7 +2694,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 20)
-// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK12-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK12-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
@@ -2826,7 +2802,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK12-SAME: () #[[ATTR6:[0-9]+]] {
+// CHECK12-SAME: () #[[ATTR7:[0-9]+]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK12-NEXT:    ret void

diff  --git a/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp
index c101551efb4fb..16aa1286236e7 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp
@@ -2132,14 +2132,14 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK9-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK9-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK9-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK9-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK9-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK9-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK9-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK9-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK9-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2148,176 +2148,168 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK9-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK9-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK9-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK9-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK9-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK9-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK9-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK9-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK9-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK9-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK9-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR4:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK9-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK9-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK9-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK9-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK9-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK9-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK9-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.2 to i8*), i64 24, i1 false)
 // CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK9-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK9-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK9-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK9-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK9-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK9-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK9-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK9-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK9-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK9-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK9-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK9-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK9-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK9-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK9-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK9-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK9-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP60]])
-// CHECK9-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK9-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK9-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK9-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK9-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK9-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP58]])
+// CHECK9-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK9-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK9-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK9:       omp_offload.failed16:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR4]]
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR4]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK9:       omp_offload.cont17:
-// CHECK9-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK9-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[TMP61:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP61]], i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK9-NEXT:    [[TMP62:%.*]] = load i32, i32* [[N]], align 4
 // CHECK9-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP64]], i32* [[CONV20]], align 4
-// CHECK9-NEXT:    [[TMP65:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK9-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK9-NEXT:    store i32 [[TMP62]], i32* [[CONV20]], align 4
+// CHECK9-NEXT:    [[TMP63:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK9-NEXT:    [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
 // CHECK9-NEXT:    [[CONV21:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP66]], i32* [[CONV21]], align 4
-// CHECK9-NEXT:    [[TMP67:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK9-NEXT:    [[TMP68:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK9-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP65]], i64* [[TMP70]], align 8
-// CHECK9-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP65]], i64* [[TMP72]], align 8
-// CHECK9-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP73]], align 8
-// CHECK9-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP74]], align 8
-// CHECK9-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 [[TMP64]], i32* [[CONV21]], align 4
+// CHECK9-NEXT:    [[TMP65:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK9-NEXT:    [[TMP66:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK9-NEXT:    [[TMP67:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES25]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP67]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.5 to i8*), i64 32, i1 false)
+// CHECK9-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP63]], i64* [[TMP69]], align 8
+// CHECK9-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP63]], i64* [[TMP71]], align 8
+// CHECK9-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP72]], align 8
+// CHECK9-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP74]], align 8
+// CHECK9-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP76]], align 8
-// CHECK9-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
-// CHECK9-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 8, i64* [[TMP79]], align 8
-// CHECK9-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP80]], align 8
-// CHECK9-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP82]], align 8
-// CHECK9-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 8
-// CHECK9-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 [[TMP68]], i64* [[TMP85]], align 8
-// CHECK9-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP86]], align 8
-// CHECK9-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 8
-// CHECK9-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK9-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 3
-// CHECK9-NEXT:    store i64 4, i64* [[TMP91]], align 8
-// CHECK9-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP92]], align 8
-// CHECK9-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP96:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP96]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK9-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK9-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP97]], 0
+// CHECK9-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP77]], align 8
+// CHECK9-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP79]], align 8
+// CHECK9-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 8
+// CHECK9-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP66]], i64* [[TMP82]], align 8
+// CHECK9-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP83]], align 8
+// CHECK9-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP65]], i64* [[TMP85]], align 8
+// CHECK9-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP65]], i64* [[TMP87]], align 8
+// CHECK9-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP88]], align 8
+// CHECK9-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP92:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP92]], i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK9-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK9-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP93]], 0
 // CHECK9-NEXT:    [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1
 // CHECK9-NEXT:    [[SUB31:%.*]] = sub nsw i32 [[DIV30]], 1
 // CHECK9-NEXT:    store i32 [[SUB31]], i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK9-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK9-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP98]], 1
-// CHECK9-NEXT:    [[TMP99:%.*]] = zext i32 [[ADD32]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP99]])
-// CHECK9-NEXT:    [[TMP100:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102.region_id, i32 4, i8** [[TMP93]], i8** [[TMP94]], i64* [[TMP95]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK9-NEXT:    [[TMP101:%.*]] = icmp ne i32 [[TMP100]], 0
-// CHECK9-NEXT:    br i1 [[TMP101]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
+// CHECK9-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
+// CHECK9-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP94]], 1
+// CHECK9-NEXT:    [[TMP95:%.*]] = zext i32 [[ADD32]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP95]])
+// CHECK9-NEXT:    [[TMP96:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102.region_id, i32 4, i8** [[TMP89]], i8** [[TMP90]], i64* [[TMP91]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK9-NEXT:    [[TMP97:%.*]] = icmp ne i32 [[TMP96]], 0
+// CHECK9-NEXT:    br i1 [[TMP97]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
 // CHECK9:       omp_offload.failed33:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102(i64 [[TMP65]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP67]]) #[[ATTR4]]
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102(i64 [[TMP63]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP65]]) #[[ATTR4]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT34]]
 // CHECK9:       omp_offload.cont34:
-// CHECK9-NEXT:    [[TMP102:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP102]])
+// CHECK9-NEXT:    [[TMP98:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP98]])
 // CHECK9-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    [[TMP103:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP103]])
-// CHECK9-NEXT:    [[TMP104:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    ret i32 [[TMP104]]
+// CHECK9-NEXT:    [[TMP99:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP99]])
+// CHECK9-NEXT:    [[TMP100:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    ret i32 [[TMP100]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94
@@ -2601,11 +2593,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK9-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK9-NEXT:    [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2734,7 +2726,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
+// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR6:[0-9]+]] comdat {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -2762,7 +2754,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l76.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l76.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK9-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK9-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
@@ -2780,7 +2772,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK9-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l80.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK9-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l80.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK9-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK9-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK9:       omp_offload.failed5:
@@ -2798,7 +2790,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK9-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK9-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK9-NEXT:    [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
 // CHECK9-NEXT:    br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK9:       omp_offload.failed11:
@@ -2814,11 +2806,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK9-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2898,11 +2890,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK9-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2982,11 +2974,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK9-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3078,7 +3070,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK9-SAME: () #[[ATTR6:[0-9]+]] {
+// CHECK9-SAME: () #[[ATTR7:[0-9]+]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK9-NEXT:    ret void
@@ -3134,14 +3126,14 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK10-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK10-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK10-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK10-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK10-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK10-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK10-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK10-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK10-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK10-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -3150,176 +3142,168 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK10-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK10-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK10-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK10-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK10-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK10-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK10-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK10-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK10-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK10-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK10-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK10-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK10-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK10-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK10-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK10-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK10-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
 // CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR4:[0-9]+]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK10:       omp_offload.cont:
-// CHECK10-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK10-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK10-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK10-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK10-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK10-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK10-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK10-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK10-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.2 to i8*), i64 24, i1 false)
 // CHECK10-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK10-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK10-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK10-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK10-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK10-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK10-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK10-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK10-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK10-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK10-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK10-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK10-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK10-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK10-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK10-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK10-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK10-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK10-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK10-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK10-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK10-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK10-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK10-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK10-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK10-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK10-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK10-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK10-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK10-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK10-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK10-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK10-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK10-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK10-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP60]])
-// CHECK10-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK10-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK10-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK10-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK10-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK10-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP58]])
+// CHECK10-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK10-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK10-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK10:       omp_offload.failed16:
-// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR4]]
+// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR4]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK10:       omp_offload.cont17:
-// CHECK10-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK10-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    [[TMP61:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP61]], i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK10-NEXT:    [[TMP62:%.*]] = load i32, i32* [[N]], align 4
 // CHECK10-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK10-NEXT:    store i32 [[TMP64]], i32* [[CONV20]], align 4
-// CHECK10-NEXT:    [[TMP65:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK10-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
+// CHECK10-NEXT:    store i32 [[TMP62]], i32* [[CONV20]], align 4
+// CHECK10-NEXT:    [[TMP63:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK10-NEXT:    [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4
 // CHECK10-NEXT:    [[CONV21:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
-// CHECK10-NEXT:    store i32 [[TMP66]], i32* [[CONV21]], align 4
-// CHECK10-NEXT:    [[TMP67:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK10-NEXT:    [[TMP68:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK10-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP65]], i64* [[TMP70]], align 8
-// CHECK10-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP65]], i64* [[TMP72]], align 8
-// CHECK10-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP73]], align 8
-// CHECK10-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
-// CHECK10-NEXT:    store i8* null, i8** [[TMP74]], align 8
-// CHECK10-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK10-NEXT:    store i32 [[TMP64]], i32* [[CONV21]], align 4
+// CHECK10-NEXT:    [[TMP65:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK10-NEXT:    [[TMP66:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK10-NEXT:    [[TMP67:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES25]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP67]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.5 to i8*), i64 32, i1 false)
+// CHECK10-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP63]], i64* [[TMP69]], align 8
+// CHECK10-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP63]], i64* [[TMP71]], align 8
+// CHECK10-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 0
+// CHECK10-NEXT:    store i8* null, i8** [[TMP72]], align 8
+// CHECK10-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK10-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP74]], align 8
+// CHECK10-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP76]], align 8
-// CHECK10-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
-// CHECK10-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
-// CHECK10-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 8, i64* [[TMP79]], align 8
-// CHECK10-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP80]], align 8
-// CHECK10-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP82]], align 8
-// CHECK10-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 8
-// CHECK10-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 [[TMP68]], i64* [[TMP85]], align 8
-// CHECK10-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP86]], align 8
-// CHECK10-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
-// CHECK10-NEXT:    [[TMP88:%.*]] = bitcast i8** [[TMP87]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 8
-// CHECK10-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
-// CHECK10-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK10-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 3
-// CHECK10-NEXT:    store i64 4, i64* [[TMP91]], align 8
-// CHECK10-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
-// CHECK10-NEXT:    store i8* null, i8** [[TMP92]], align 8
-// CHECK10-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP96:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP96]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK10-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK10-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP97]], 0
+// CHECK10-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP77]], align 8
+// CHECK10-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP79]], align 8
+// CHECK10-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 8
+// CHECK10-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP66]], i64* [[TMP82]], align 8
+// CHECK10-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP83]], align 8
+// CHECK10-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP65]], i64* [[TMP85]], align 8
+// CHECK10-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP65]], i64* [[TMP87]], align 8
+// CHECK10-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS24]], i64 0, i64 3
+// CHECK10-NEXT:    store i8* null, i8** [[TMP88]], align 8
+// CHECK10-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES25]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP92:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP92]], i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK10-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK10-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[TMP93]], 0
 // CHECK10-NEXT:    [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1
 // CHECK10-NEXT:    [[SUB31:%.*]] = sub nsw i32 [[DIV30]], 1
 // CHECK10-NEXT:    store i32 [[SUB31]], i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK10-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
-// CHECK10-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP98]], 1
-// CHECK10-NEXT:    [[TMP99:%.*]] = zext i32 [[ADD32]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP99]])
-// CHECK10-NEXT:    [[TMP100:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102.region_id, i32 4, i8** [[TMP93]], i8** [[TMP94]], i64* [[TMP95]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK10-NEXT:    [[TMP101:%.*]] = icmp ne i32 [[TMP100]], 0
-// CHECK10-NEXT:    br i1 [[TMP101]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
+// CHECK10-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_28]], align 4
+// CHECK10-NEXT:    [[ADD32:%.*]] = add nsw i32 [[TMP94]], 1
+// CHECK10-NEXT:    [[TMP95:%.*]] = zext i32 [[ADD32]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP95]])
+// CHECK10-NEXT:    [[TMP96:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102.region_id, i32 4, i8** [[TMP89]], i8** [[TMP90]], i64* [[TMP91]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK10-NEXT:    [[TMP97:%.*]] = icmp ne i32 [[TMP96]], 0
+// CHECK10-NEXT:    br i1 [[TMP97]], label [[OMP_OFFLOAD_FAILED33:%.*]], label [[OMP_OFFLOAD_CONT34:%.*]]
 // CHECK10:       omp_offload.failed33:
-// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102(i64 [[TMP65]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP67]]) #[[ATTR4]]
+// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102(i64 [[TMP63]], i64 [[TMP1]], i32* [[VLA]], i64 [[TMP65]]) #[[ATTR4]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT34]]
 // CHECK10:       omp_offload.cont34:
-// CHECK10-NEXT:    [[TMP102:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP102]])
+// CHECK10-NEXT:    [[TMP98:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP98]])
 // CHECK10-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    [[TMP103:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP103]])
-// CHECK10-NEXT:    [[TMP104:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    ret i32 [[TMP104]]
+// CHECK10-NEXT:    [[TMP99:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP99]])
+// CHECK10-NEXT:    [[TMP100:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK10-NEXT:    ret i32 [[TMP100]]
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94
@@ -3603,11 +3587,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK10-NEXT:    store i32 [[TMP4]], i32* [[CONV3]], align 4
 // CHECK10-NEXT:    [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP3]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP5]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..3
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..4
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3736,7 +3720,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
+// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR6:[0-9]+]] comdat {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -3764,7 +3748,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l76.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l76.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK10-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK10-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
@@ -3782,7 +3766,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK10-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l80.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK10-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l80.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK10-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK10-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK10:       omp_offload.failed5:
@@ -3800,7 +3784,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK10-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK10-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK10-NEXT:    [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
 // CHECK10-NEXT:    br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK10:       omp_offload.failed11:
@@ -3816,11 +3800,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK10-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK10-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3900,11 +3884,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK10-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK10-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..7
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3984,11 +3968,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK10-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK10-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..10
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -4080,7 +4064,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK10-SAME: () #[[ATTR6:[0-9]+]] {
+// CHECK10-SAME: () #[[ATTR7:[0-9]+]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK10-NEXT:    ret void
@@ -4135,14 +4119,14 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK11-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK11-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK11-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK11-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK11-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK11-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK11-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK11-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK11-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -4151,175 +4135,167 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK11-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK11-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK11-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK11-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK11-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK11-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK11-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK11-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK11-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK11-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK11-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK11-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK11-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK11-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR4:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK11-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK11-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK11-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK11-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK11-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK11-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK11-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.2 to i8*), i32 24, i1 false)
 // CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK11-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK11-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK11-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK11-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK11-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK11-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK11-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK11-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK11-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK11-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK11-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK11-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK11-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK11-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK11-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK11-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK11-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK11-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK11-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK11-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK11-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK11-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK11-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP61]])
-// CHECK11-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK11-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK11-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK11-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK11-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK11-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP59]])
+// CHECK11-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK11-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK11-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK11:       omp_offload.failed15:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR4]]
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR4]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK11:       omp_offload.cont16:
-// CHECK11-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP64]], i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK11-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP65]], i32* [[N_CASTED18]], align 4
-// CHECK11-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N_CASTED18]], align 4
-// CHECK11-NEXT:    [[TMP67:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK11-NEXT:    store i32 [[TMP67]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK11-NEXT:    [[TMP68:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK11-NEXT:    [[TMP69:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK11-NEXT:    [[TMP70:%.*]] = sext i32 [[TMP69]] to i64
-// CHECK11-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP66]], i32* [[TMP72]], align 4
-// CHECK11-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP66]], i32* [[TMP74]], align 4
-// CHECK11-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP75]], align 4
-// CHECK11-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP76]], align 4
-// CHECK11-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP62:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP62]], i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK11-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP63]], i32* [[N_CASTED18]], align 4
+// CHECK11-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N_CASTED18]], align 4
+// CHECK11-NEXT:    [[TMP65:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK11-NEXT:    store i32 [[TMP65]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK11-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK11-NEXT:    [[TMP67:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK11-NEXT:    [[TMP68:%.*]] = sext i32 [[TMP67]] to i64
+// CHECK11-NEXT:    [[TMP69:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES22]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP69]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.5 to i8*), i32 32, i1 false)
+// CHECK11-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP64]], i32* [[TMP71]], align 4
+// CHECK11-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP64]], i32* [[TMP73]], align 4
+// CHECK11-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP74]], align 4
+// CHECK11-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP76]], align 4
+// CHECK11-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP78]], align 4
-// CHECK11-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
-// CHECK11-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP81]], align 4
-// CHECK11-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP82]], align 4
-// CHECK11-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 4
-// CHECK11-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP86]], align 4
-// CHECK11-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 [[TMP70]], i64* [[TMP87]], align 4
-// CHECK11-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP88]], align 4
-// CHECK11-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP68]], i32* [[TMP90]], align 4
-// CHECK11-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP68]], i32* [[TMP92]], align 4
-// CHECK11-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 3
-// CHECK11-NEXT:    store i64 4, i64* [[TMP93]], align 4
-// CHECK11-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP94]], align 4
-// CHECK11-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP98:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP98]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK11-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK11-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP99]], 0
+// CHECK11-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP79]], align 4
+// CHECK11-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 4
+// CHECK11-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 4
+// CHECK11-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
+// CHECK11-NEXT:    store i64 [[TMP68]], i64* [[TMP84]], align 4
+// CHECK11-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP85]], align 4
+// CHECK11-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP66]], i32* [[TMP87]], align 4
+// CHECK11-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP66]], i32* [[TMP89]], align 4
+// CHECK11-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP90]], align 4
+// CHECK11-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP94:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP94]], i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK11-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK11-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP95]], 0
 // CHECK11-NEXT:    [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1
 // CHECK11-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[DIV27]], 1
 // CHECK11-NEXT:    store i32 [[SUB28]], i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK11-NEXT:    [[TMP100:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK11-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP100]], 1
-// CHECK11-NEXT:    [[TMP101:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP101]])
-// CHECK11-NEXT:    [[TMP102:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102.region_id, i32 4, i8** [[TMP95]], i8** [[TMP96]], i64* [[TMP97]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK11-NEXT:    [[TMP103:%.*]] = icmp ne i32 [[TMP102]], 0
-// CHECK11-NEXT:    br i1 [[TMP103]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK11-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK11-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP96]], 1
+// CHECK11-NEXT:    [[TMP97:%.*]] = zext i32 [[ADD29]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP97]])
+// CHECK11-NEXT:    [[TMP98:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102.region_id, i32 4, i8** [[TMP91]], i8** [[TMP92]], i64* [[TMP93]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK11-NEXT:    [[TMP99:%.*]] = icmp ne i32 [[TMP98]], 0
+// CHECK11-NEXT:    br i1 [[TMP99]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
 // CHECK11:       omp_offload.failed30:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102(i32 [[TMP66]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP68]]) #[[ATTR4]]
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102(i32 [[TMP64]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP66]]) #[[ATTR4]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
 // CHECK11:       omp_offload.cont31:
-// CHECK11-NEXT:    [[TMP104:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP104]])
+// CHECK11-NEXT:    [[TMP100:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP100]])
 // CHECK11-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    [[TMP105:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP105]])
-// CHECK11-NEXT:    [[TMP106:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    ret i32 [[TMP106]]
+// CHECK11-NEXT:    [[TMP101:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP101]])
+// CHECK11-NEXT:    [[TMP102:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK11-NEXT:    ret i32 [[TMP102]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94
@@ -4591,11 +4567,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK11-NEXT:    store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK11-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -4721,7 +4697,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
+// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR6:[0-9]+]] comdat {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -4749,7 +4725,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l76.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l76.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK11-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK11-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
@@ -4767,7 +4743,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK11-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l80.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK11-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l80.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK11-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK11-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK11:       omp_offload.failed5:
@@ -4785,7 +4761,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK11-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK11-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK11-NEXT:    [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
 // CHECK11-NEXT:    br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK11:       omp_offload.failed11:
@@ -4801,11 +4777,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK11-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -4884,11 +4860,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK11-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -4967,11 +4943,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK11-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5062,7 +5038,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK11-SAME: () #[[ATTR6:[0-9]+]] {
+// CHECK11-SAME: () #[[ATTR7:[0-9]+]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK11-NEXT:    ret void
@@ -5117,14 +5093,14 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK12-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK12-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK12-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK12-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK12-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK12-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK12-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK12-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK12-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK12-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -5133,175 +5109,167 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK12-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK12-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK12-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK12-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK12-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK12-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK12-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK12-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK12-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK12-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK12-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK12-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK12-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK12-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK12-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK12-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK12-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
 // CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR4:[0-9]+]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK12:       omp_offload.cont:
-// CHECK12-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK12-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK12-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK12-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK12-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK12-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK12-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK12-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK12-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.2 to i8*), i32 24, i1 false)
 // CHECK12-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK12-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK12-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK12-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK12-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK12-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK12-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK12-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK12-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK12-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK12-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK12-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK12-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK12-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK12-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK12-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK12-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK12-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK12-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK12-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK12-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK12-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK12-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK12-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK12-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK12-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK12-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK12-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK12-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK12-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK12-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK12-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK12-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK12-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK12-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK12-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK12-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK12-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK12-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK12-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP61]])
-// CHECK12-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK12-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK12-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK12-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK12-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK12-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP59]])
+// CHECK12-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK12-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK12-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK12:       omp_offload.failed15:
-// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR4]]
+// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR4]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK12:       omp_offload.cont16:
-// CHECK12-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP64]], i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK12-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP65]], i32* [[N_CASTED18]], align 4
-// CHECK12-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N_CASTED18]], align 4
-// CHECK12-NEXT:    [[TMP67:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK12-NEXT:    store i32 [[TMP67]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK12-NEXT:    [[TMP68:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK12-NEXT:    [[TMP69:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK12-NEXT:    [[TMP70:%.*]] = sext i32 [[TMP69]] to i64
-// CHECK12-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP66]], i32* [[TMP72]], align 4
-// CHECK12-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP66]], i32* [[TMP74]], align 4
-// CHECK12-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP75]], align 4
-// CHECK12-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
-// CHECK12-NEXT:    store i8* null, i8** [[TMP76]], align 4
-// CHECK12-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP62:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP62]], i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK12-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP63]], i32* [[N_CASTED18]], align 4
+// CHECK12-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N_CASTED18]], align 4
+// CHECK12-NEXT:    [[TMP65:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_17]], align 4
+// CHECK12-NEXT:    store i32 [[TMP65]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK12-NEXT:    [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK12-NEXT:    [[TMP67:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK12-NEXT:    [[TMP68:%.*]] = sext i32 [[TMP67]] to i64
+// CHECK12-NEXT:    [[TMP69:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES22]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP69]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.5 to i8*), i32 32, i1 false)
+// CHECK12-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP64]], i32* [[TMP71]], align 4
+// CHECK12-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP64]], i32* [[TMP73]], align 4
+// CHECK12-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 0
+// CHECK12-NEXT:    store i8* null, i8** [[TMP74]], align 4
+// CHECK12-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP76]], align 4
+// CHECK12-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP78]], align 4
-// CHECK12-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 1
-// CHECK12-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
-// CHECK12-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP81]], align 4
-// CHECK12-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP82]], align 4
-// CHECK12-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP84]], align 4
-// CHECK12-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP86:%.*]] = bitcast i8** [[TMP85]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP86]], align 4
-// CHECK12-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 [[TMP70]], i64* [[TMP87]], align 4
-// CHECK12-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP88]], align 4
-// CHECK12-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
-// CHECK12-NEXT:    [[TMP90:%.*]] = bitcast i8** [[TMP89]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP68]], i32* [[TMP90]], align 4
-// CHECK12-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
-// CHECK12-NEXT:    [[TMP92:%.*]] = bitcast i8** [[TMP91]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP68]], i32* [[TMP92]], align 4
-// CHECK12-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 3
-// CHECK12-NEXT:    store i64 4, i64* [[TMP93]], align 4
-// CHECK12-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
-// CHECK12-NEXT:    store i8* null, i8** [[TMP94]], align 4
-// CHECK12-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP98:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP98]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK12-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK12-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP99]], 0
+// CHECK12-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP79]], align 4
+// CHECK12-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 4
+// CHECK12-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 4
+// CHECK12-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 2
+// CHECK12-NEXT:    store i64 [[TMP68]], i64* [[TMP84]], align 4
+// CHECK12-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP85]], align 4
+// CHECK12-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP66]], i32* [[TMP87]], align 4
+// CHECK12-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP66]], i32* [[TMP89]], align 4
+// CHECK12-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS21]], i32 0, i32 3
+// CHECK12-NEXT:    store i8* null, i8** [[TMP90]], align 4
+// CHECK12-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS20]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES22]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP94:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP94]], i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK12-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK12-NEXT:    [[SUB26:%.*]] = sub nsw i32 [[TMP95]], 0
 // CHECK12-NEXT:    [[DIV27:%.*]] = sdiv i32 [[SUB26]], 1
 // CHECK12-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[DIV27]], 1
 // CHECK12-NEXT:    store i32 [[SUB28]], i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK12-NEXT:    [[TMP100:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK12-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP100]], 1
-// CHECK12-NEXT:    [[TMP101:%.*]] = zext i32 [[ADD29]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP101]])
-// CHECK12-NEXT:    [[TMP102:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102.region_id, i32 4, i8** [[TMP95]], i8** [[TMP96]], i64* [[TMP97]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK12-NEXT:    [[TMP103:%.*]] = icmp ne i32 [[TMP102]], 0
-// CHECK12-NEXT:    br i1 [[TMP103]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK12-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK12-NEXT:    [[ADD29:%.*]] = add nsw i32 [[TMP96]], 1
+// CHECK12-NEXT:    [[TMP97:%.*]] = zext i32 [[ADD29]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP97]])
+// CHECK12-NEXT:    [[TMP98:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102.region_id, i32 4, i8** [[TMP91]], i8** [[TMP92]], i64* [[TMP93]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK12-NEXT:    [[TMP99:%.*]] = icmp ne i32 [[TMP98]], 0
+// CHECK12-NEXT:    br i1 [[TMP99]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
 // CHECK12:       omp_offload.failed30:
-// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102(i32 [[TMP66]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP68]]) #[[ATTR4]]
+// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102(i32 [[TMP64]], i32 [[TMP0]], i32* [[VLA]], i32 [[TMP66]]) #[[ATTR4]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT31]]
 // CHECK12:       omp_offload.cont31:
-// CHECK12-NEXT:    [[TMP104:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP104]])
+// CHECK12-NEXT:    [[TMP100:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP100]])
 // CHECK12-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    [[TMP105:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP105]])
-// CHECK12-NEXT:    [[TMP106:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    ret i32 [[TMP106]]
+// CHECK12-NEXT:    [[TMP101:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP101]])
+// CHECK12-NEXT:    [[TMP102:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK12-NEXT:    ret i32 [[TMP102]]
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94
@@ -5573,11 +5541,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK12-NEXT:    store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK12-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP3]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP5]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..3
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..4
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5703,7 +5671,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
+// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR6:[0-9]+]] comdat {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK12-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -5731,7 +5699,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l76.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l76.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK12-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK12-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
@@ -5749,7 +5717,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK12-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l80.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK12-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l80.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK12-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK12-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK12:       omp_offload.failed5:
@@ -5767,7 +5735,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK12-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK12-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK12-NEXT:    [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
 // CHECK12-NEXT:    br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK12:       omp_offload.failed11:
@@ -5783,11 +5751,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK12-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK12-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5866,11 +5834,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK12-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK12-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..7
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5949,11 +5917,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK12-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK12-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..10
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -6044,7 +6012,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK12-SAME: () #[[ATTR6:[0-9]+]] {
+// CHECK12-SAME: () #[[ATTR7:[0-9]+]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK12-NEXT:    ret void

diff  --git a/clang/test/OpenMP/target_update_codegen.cpp b/clang/test/OpenMP/target_update_codegen.cpp
index 5c47dc42bc58a..174d68f0a3f96 100644
--- a/clang/test/OpenMP/target_update_codegen.cpp
+++ b/clang/test/OpenMP/target_update_codegen.cpp
@@ -43,7 +43,7 @@ double gc[100];
 
 // CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i64] [i64 2]
 
-// CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i64] [i64 sdiv exact (i64 sub (i64 ptrtoint (double** getelementptr (double*, double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), i32 1) to i64), i64 ptrtoint (double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1) to i64)), i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)), i64 24]
+// CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 24]
 // CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 281474976710673]
 
 // CK1-LABEL: _Z3fooi
@@ -135,16 +135,19 @@ void foo(int arg) {
   {++arg;}
 
   // Region 04
-  // CK1-DAG: call void @__tgt_target_data_update_mapper(%struct.ident_t* @{{.+}}, i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null)
+  // CK1-DAG: call void @__tgt_target_data_update_mapper(%struct.ident_t* @{{.+}}, i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%[^,]+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
+  // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]]
 
   // CK1-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
   // CK1-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK1-DAG: [[PS0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i{{.+}} 0, i{{.+}} 0
   // CK1-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[ST]]**
   // CK1-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to double***
   // CK1-DAG: store [[ST]]* @gb, [[ST]]** [[CBP0]]
   // CK1-DAG: store double** getelementptr inbounds ([[ST]], [[ST]]* @gb, i32 0, i32 1), double*** [[CP0]]
+  // CK1-DAG: store i64 sdiv exact (i64 sub (i64 ptrtoint (double** getelementptr (double*, double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), i32 1) to i64), i64 ptrtoint (double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1) to i64)), i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)), i64* [[PS0]],
 
 
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
@@ -206,6 +209,7 @@ struct ST {
   }
 };
 
+// CK2: [[SIZE00:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 24]
 // CK2: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 281474976710674]
 
 // CK2-LABEL: _Z3bari
@@ -217,21 +221,21 @@ int bar(int arg){
 // Region 00
 // CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CK2: [[IFTHEN]]
-// CK2-DAG: call void @__tgt_target_data_update_mapper(%struct.ident_t* @{{.+}}, i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
+// CK2-DAG: call void @__tgt_target_data_update_mapper(%struct.ident_t* @{{.+}}, i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
 // CK2-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK2-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
-// CK2-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+// CK2-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]]
 
 // CK2-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
 // CK2-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-// CK2-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+// CK2-DAG: [[PS0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i{{.+}} 0, i{{.+}} 0
 // CK2-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[ST]]**
 // CK2-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to double***
 // CK2-DAG: store [[ST]]* [[VAR0:%[^,]+]], [[ST]]** [[CBP0]]
 // CK2-DAG: store double** [[SEC0:%[^,]+]], double*** [[CP0]]
-// CK2-DAG: store i[[sz]] {{%.+}}, i[[sz]]* [[S0]]
+// CK2-DAG: store i64 {{%.+}}, i64* [[PS0]],
 // CK2-DAG: [[SEC0]] = getelementptr inbounds {{.*}}[[ST]]* [[VAR0]], i32 0, i32 1
 
 
@@ -536,6 +540,7 @@ struct S {
   double *p;
 };
 
+// CK9: [[SIZE00:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 8]
 // CK9: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 281474976710673]
 
 // CK9-LABEL: lvalue
@@ -548,12 +553,10 @@ void lvalue(struct S *s, int l, int e) {
   //
   // CK9-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
   // CK9-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK9-DAG: [[SIZE0:%.+]] = getelementptr inbounds {{.+}}[[SIZE]], i{{.+}} 0, i{{.+}} 1
   // CK9-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to double***
   // CK9-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to double**
   // CK9-DAG: store double** [[P:%.+]], double*** [[BPC0]]
   // CK9-DAG: store double* [[P_VAL:%.+]], double** [[PC0]]
-  // CK9-DAG: store i{{.+}} 8, i{{.+}}* [[SIZE0]]
   // CK9-DAG: [[P]] = getelementptr inbounds [[STRUCT_S:%.+]], [[STRUCT_S]]* [[S_VAL:%.+]], i32 0, i32 0
   // CK9-DAG: [[S_VAL]] = load [[STRUCT_S]]*, [[STRUCT_S]]** [[S_ADDR:%.+]]
   // CK9-DAG: [[P_VAL]] = load double*, double** [[P_1:%.+]],
@@ -586,6 +589,7 @@ struct S {
   double *p;
 };
 
+// CK10: [[SIZE00:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 8]
 // CK10: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 281474976710673]
 
 // CK10-LABEL: lvalue
@@ -598,12 +602,10 @@ void lvalue(struct S *s, int l, int e) {
   //
   // CK10-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
   // CK10-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK10-DAG: [[SIZE0:%.+]] = getelementptr inbounds {{.+}}[[SIZE]], i{{.+}} 0, i{{.+}} 1
   // CK10-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to double***
   // CK10-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to double**
   // CK10-DAG: store double** [[P_VAL:%.+]], double*** [[BPC0]]
   // CK10-DAG: store double* [[ADD_PTR:%.+]], double** [[PC0]]
-  // CK10-DAG: store i{{.+}} 8, i{{.+}}* [[SIZE0]]
   // CK10-64-DAG: [[ADD_PTR]] = getelementptr inbounds double, double* [[S_P:%.+]], i{{.+}} [[IDX_EXT:%.+]]
   // CK10-32-DAG: [[ADD_PTR]] = getelementptr inbounds double, double* [[S_P:%.+]], i{{.+}} [[L_VAL:%.+]]
   // CK10-64-DAG: [[IDX_EXT]] = sext i32 [[L_VAL:%.+]] to i64
@@ -636,6 +638,7 @@ void lvalue(struct S *s, int l, int e) {
 struct S {
   double *p;
 };
+// CK11: [[SIZE00:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 8]
 // CK11: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 281474976710673]
 
 // CK11-LABEL: lvalue
@@ -648,12 +651,10 @@ void lvalue(struct S *s, int l, int e) {
   //
   // CK11-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
   // CK11-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK11-DAG: [[SIZE0:%.+]] = getelementptr inbounds {{.+}}[[SIZE]], i{{.+}} 0, i{{.+}} 1
   // CK11-DAG: [[BPC0:%.+]] = bitcast i8** [[BP0]] to double***
   // CK11-DAG: [[PC0:%.+]] = bitcast i8** [[P0]] to double**
   // CK11-DAG: store double** [[P:%.+]], double*** [[BPC0]]
   // CK11-DAG: store double* [[ARRAY_IDX:%.+]], double** [[PC0]]
-  // CK11-DAG: store i{{.+}} 8, i{{.+}} [[SIZE0]]
   // CK11-DAG: [[P]] = getelementptr inbounds [[STRUCT_S:%.+]], [[STRUCT_S]]* [[SS_1:%.+]], i32 0, i32 0
   // CK11-DAG: [[ARRAY_IDX]] = getelementptr inbounds double, double* [[ADD_PTR:%.+]], i{{.+}} 3
   // CK11-64-DAG: [[ADD_PTR]] = getelementptr inbounds double, double* [[S_P:%.+]], i{{.+}} [[IDX_EXT:%.+]]
@@ -688,6 +689,7 @@ struct S {
   double *p;
   struct S *sp;
 };
+// CK12: [[SIZE00:@.+]] = {{.+}}constant [3 x i64] [i64 0, i64 {{4|8}}, i64 8]
 // CK12: [[MTYPE00:@.+]] = {{.+}}constant [3 x i64] [i64 0, i64 281474976710672, i64 17]
 
 // CK12-LABEL: lvalue
@@ -700,20 +702,16 @@ void lvalue(struct S *s, int l, int e) {
   //
   // CK12-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
   // CK12-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
-  // CK12-DAG: [[SIZE2:%.+]] = getelementptr inbounds {{.+}}[[SIZE]], i{{.+}} 0, i{{.+}} 2
   // CK12-DAG: [[BPC2:%.+]] = bitcast i8** [[BP2]] to double***
   // CK12-DAG: [[PC2:%.+]] = bitcast i8** [[P2]] to double**
   // CK12-DAG: store double** [[P_VAL:%.+]], double*** [[BPC2]]
   // CK12-DAG: store double* [[SIX:%.+]], double** [[PC2]]
-  // CK12-DAG: store i{{.+}} 8, i{{.+}}* [[SIZE2]]
   // CK12-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
   // CK12-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK12-DAG: [[SIZE1:%.+]] = getelementptr inbounds {{.+}}[[SIZE]], i{{.+}} 0, i{{.+}} 1
   // CK12-DAG: [[BPC1:%.+]] = bitcast i8** [[BP1]] to [[STRUCT_S:%.+]]***
   // CK12-DAG: [[PC1:%.+]] = bitcast i8** [[P1]] to double***
   // CK12-DAG: store [[STRUCT_S]]** [[SP:%.+]], [[STRUCT_S]]*** [[BPC1]]
   // CK12-DAG: store double** [[P_VAL:%.+]], double*** [[PC1]]
-  // CK12-DAG: store i{{.+}} {{4|8}}, i{{.+}}* [[SIZE1]]
   // CK12-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
   // CK12-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
   // CK12-DAG: [[SIZE0:%.+]] = getelementptr inbounds {{.+}}[[SIZE]], i{{.+}} 0, i{{.+}} 0
@@ -797,6 +795,7 @@ void lvalue(int **BB, int a, int b) {
 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 #ifdef CK14
 
+// CK14: [[SIZE00:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 8]
 // CK14: [[MTYPE00:@.+]] = private {{.*}}constant [2 x i64] [i64 0, i64 281474976710673]
 
 struct SSA {
@@ -821,12 +820,10 @@ struct SSB {
 
     // CK14-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
     // CK14-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-    // CK14-DAG: [[SIZE1:%.+]] = getelementptr inbounds {{.+}}[[SIZE]], i{{.+}} 0, i{{.+}} 1
     // CK14-DAG: [[BPC1:%.+]] = bitcast i8** [[BP1]] to double***
     // CK14-DAG: [[PC1:%.+]] = bitcast i8** [[P1]] to double**
     // CK14-DAG: store double** [[D_VAL:%.+]], double*** [[BPC1]]
     // CK14-DAG: store double* [[ADD_PTR:%.+]], double** [[PC1]]
-    // CK14-DAG: store i64 8, i64* [[SIZE1]]
     // CK14-DAG: [[ADD_PTR]] = getelementptr inbounds double, double* [[ZERO:%.+]], i{{.+}} 1
     // CK14-DAG: [[ZERO]] = load double*, double** [[D_VAL_2:%.+]]
     // CK14-DAG: [[D_VAL]] = getelementptr inbounds [[SSB:%.+]], [[SSB:%.+]]* [[THIS:%.+]], i32 0, i32 0
@@ -870,6 +867,7 @@ void lvalue_member(SSA *sap) {
 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 #ifdef CK15
 
+// CK15: [[SIZE00:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 {{8|4}}]
 // CK15: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 281474976710673]
 
 struct SSA {
@@ -888,12 +886,10 @@ void lvalue_member(SSA *sap) {
 
   // CK15-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
   // CK15-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK15-DAG: [[SIZE1:%.+]] = getelementptr inbounds {{.+}}[[SIZE]], i{{.+}} 0, i{{.+}} 1
   // CK15-DAG: [[BPC1:%.+]] = bitcast i8** [[BP1]] to double***
   // CK15-DAG: [[PC1:%.+]] = bitcast i8** [[P1]] to double**
   // CK15-DAG: store double** [[P_VAL:%.+]], double*** [[BPC1]]
   // CK15-DAG: store double* [[ADD_PTR:%.+]], double** [[PC1]]
-  // CK15-DAG: store i64 {{4|8}}, i64* [[SIZE1]]
   // CK15-DAG: [[ADD_PTR]] = getelementptr inbounds double, double* [[THREE:%.+]], i{{.+}} 3
   // CK15-DAG: [[THREE]] = load double*, double** [[P_VAL_1:%.+]]
   // CK15-DAG: [[P_VAL]] = getelementptr inbounds [[SSA:%.+]], [[SSA:%.+]]* [[THIS:%.+]], i32 0, i32 0
@@ -1255,6 +1251,7 @@ void foo(int arg) {
 // CK21: [[STRUCT_ST:%.+]] = type { [10 x [10 x [10 x double*]]] }
 // CK21: [[STRUCT_DESCRIPTOR:%.+]]  = type { i64, i64, i64 }
 
+// CK21: [[SIZE:@.+]] = private unnamed_addr constant [2 x i64] zeroinitializer
 // CK21: [[MTYPE:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 299067162755073]
 
 struct ST {

diff  --git a/clang/test/OpenMP/target_update_depend_codegen.cpp b/clang/test/OpenMP/target_update_depend_codegen.cpp
index b3272e5de92c4..116a903eec2d3 100644
--- a/clang/test/OpenMP/target_update_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_update_depend_codegen.cpp
@@ -37,7 +37,7 @@ double gc[100];
 
 // CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i64] [i64 2]
 
-// CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i64] [i64 sdiv exact (i64 sub (i64 ptrtoint (double** getelementptr (double*, double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), i32 1) to i64), i64 ptrtoint (double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1) to i64)), i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)), i64 24]
+// CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 24]
 // CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i64] [i64 0, i64 281474976710673]
 
 // CK1-LABEL: _Z3fooi
@@ -290,6 +290,8 @@ void foo(int arg) {
   // CK1: [[P0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%.+]], i32 0, i32 0
   // CK1: [[P0_BC:%.+]] = bitcast i8** [[P0]] to double***
   // CK1: store double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), double*** [[P0_BC]],
+  // CK1: [[PS0:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[PS:%.+]], i32 0, i32 0
+  // CK1: store i64 sdiv exact (i64 sub (i64 ptrtoint (double** getelementptr (double*, double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), i32 1) to i64), i64 ptrtoint (double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1) to i64)), i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)), i64* [[PS0]],
   // CK1: [[BP1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
   // CK1: [[BP1_BC:%.+]] = bitcast i8** [[BP1]] to double***
   // CK1: store double** getelementptr inbounds (%struct.ST, %struct.ST* @gb, i32 0, i32 1), double*** [[BP1_BC]],
@@ -298,6 +300,7 @@ void foo(int arg) {
   // CK1: store double* %{{.+}}, double** [[P1_BC]],
   // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
   // CK1: [[GEPP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
+  // CK1: [[GEPS0:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[PS]], i32 0, i32 0
   // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{88|52}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
   // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
@@ -312,10 +315,12 @@ void foo(int arg) {
   // CK1-64: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_PTRS]], i8* align {{8|4}} [[BC_PTRS]], i[[sz]] {{16|8}}, i1 false)
   // CK1-64: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 2
   // CK1-64: [[BC_PRIVS_SIZES:%.+]] = bitcast [2 x i[[sz]]]* [[PRIVS_SIZES]] to i8*
-  // CK1-64: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} bitcast ([2 x i[[sz]]]* [[SIZE04]] to i8*), i[[sz]] {{16|8}}, i1 false)
+  // CK1-64: [[BC_SIZES:%.+]] = bitcast i64* [[GEPS0]] to i8*
+  // CK1-64: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} [[BC_SIZES]], i[[sz]] {{16|8}}, i1 false)
   // CK1-32: [[PRIVS_SIZES:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 0
   // CK1-32: [[BC_PRIVS_SIZES:%.+]] = bitcast [2 x i64]* [[PRIVS_SIZES]] to i8*
-  // CK1-32: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} bitcast ([2 x i64]* [[SIZE04]] to i8*), i[[sz]] {{16|8}}, i1 false)
+  // CK1-32: [[BC_SIZES:%.+]] = bitcast i64* [[GEPS0]] to i8*
+  // CK1-32: call void @llvm.memcpy.p0i8.p0i8.i[[sz]](i8* align {{8|4}} [[BC_PRIVS_SIZES]], i8* align {{8|4}} [[BC_SIZES]], i[[sz]] {{16|8}}, i1 false)
   // CK1-32: [[PRIVS_BASEPTRS:%.+]] = getelementptr inbounds %struct..kmp_privates.t{{.+}}, %struct..kmp_privates.t{{.+}}* [[PRIVS]], i32 0, i32 1
   // CK1-32: [[BC_PRIVS_BASEPTRS:%.+]] = bitcast [2 x i8*]* [[PRIVS_BASEPTRS]] to i8*
   // CK1-32: [[BC_BASEPTRS:%.+]] = bitcast i8** [[GEPBP0]] to i8*

diff  --git a/clang/test/OpenMP/teams_distribute_codegen.cpp b/clang/test/OpenMP/teams_distribute_codegen.cpp
index b720f7872c1d2..78c6b30d7f71e 100644
--- a/clang/test/OpenMP/teams_distribute_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_codegen.cpp
@@ -1637,14 +1637,14 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK9-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK9-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK9-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK9-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK9-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK9-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK9-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK9-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK9-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1653,46 +1653,44 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK9-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK9-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK9-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK9-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK9-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK9-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK9-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK9-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK9-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
 // CHECK9-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 0
-// CHECK9-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-// CHECK9-NEXT:    [[TMP34:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP34]])
-// CHECK9-NEXT:    ret i32 [[TMP33]]
+// CHECK9-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK9-NEXT:    [[TMP33:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP33]])
+// CHECK9-NEXT:    ret i32 [[TMP32]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73
@@ -1807,7 +1805,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK9-SAME: () #[[ATTR4:[0-9]+]] {
+// CHECK9-SAME: () #[[ATTR5:[0-9]+]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK9-NEXT:    ret void
@@ -1839,14 +1837,14 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK10-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK10-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK10-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK10-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK10-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK10-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK10-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK10-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK10-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK10-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1855,46 +1853,44 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK10-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK10-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK10-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK10-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK10-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK10-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK10-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK10-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK10-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK10-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK10-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK10-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK10-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK10-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK10-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
 // CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK10:       omp_offload.cont:
 // CHECK10-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 0
-// CHECK10-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-// CHECK10-NEXT:    [[TMP34:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP34]])
-// CHECK10-NEXT:    ret i32 [[TMP33]]
+// CHECK10-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK10-NEXT:    [[TMP33:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP33]])
+// CHECK10-NEXT:    ret i32 [[TMP32]]
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73
@@ -2009,7 +2005,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK10-SAME: () #[[ATTR4:[0-9]+]] {
+// CHECK10-SAME: () #[[ATTR5:[0-9]+]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK10-NEXT:    ret void
@@ -2040,14 +2036,14 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK11-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK11-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK11-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK11-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK11-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK11-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK11-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK11-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK11-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2056,46 +2052,44 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK11-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK11-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK11-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK11-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK11-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK11-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK11-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK11-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK11-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK11-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK11-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK11-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
 // CHECK11-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i32 0
-// CHECK11-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-// CHECK11-NEXT:    [[TMP34:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP34]])
-// CHECK11-NEXT:    ret i32 [[TMP33]]
+// CHECK11-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP33]])
+// CHECK11-NEXT:    ret i32 [[TMP32]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73
@@ -2208,7 +2202,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK11-SAME: () #[[ATTR4:[0-9]+]] {
+// CHECK11-SAME: () #[[ATTR5:[0-9]+]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK11-NEXT:    ret void
@@ -2239,14 +2233,14 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK12-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK12-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK12-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK12-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK12-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK12-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK12-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK12-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK12-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK12-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2255,46 +2249,44 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK12-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK12-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK12-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK12-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK12-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK12-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK12-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK12-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK12-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK12-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK12-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK12-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK12-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK12-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK12-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
 // CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK12:       omp_offload.cont:
 // CHECK12-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i32 0
-// CHECK12-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-// CHECK12-NEXT:    [[TMP34:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP34]])
-// CHECK12-NEXT:    ret i32 [[TMP33]]
+// CHECK12-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK12-NEXT:    [[TMP33:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP33]])
+// CHECK12-NEXT:    ret i32 [[TMP32]]
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73
@@ -2407,7 +2399,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK12-SAME: () #[[ATTR4:[0-9]+]] {
+// CHECK12-SAME: () #[[ATTR5:[0-9]+]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK12-NEXT:    ret void
@@ -2955,14 +2947,14 @@ int main (int argc, char **argv) {
 // CHECK25-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK25-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK25-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK25-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK25-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK25-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK25-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK25-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK25-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK25-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK25-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK25-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK25-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK25-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK25-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK25-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK25-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK25-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK25-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK25-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK25-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK25-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2971,48 +2963,46 @@ int main (int argc, char **argv) {
 // CHECK25-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK25-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK25-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK25-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK25-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK25-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK25-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK25-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK25-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK25-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK25-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK25-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK25-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK25-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK25-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK25-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK25-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK25-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK25-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK25-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK25-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK25-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK25-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK25-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK25-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK25-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK25-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK25-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK25-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK25-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK25-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK25-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK25-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK25-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK25-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK25-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK25-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK25-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK25-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK25-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK25-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK25-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK25-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK25-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK25-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK25-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK25-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK25-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK25-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK25-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK25-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK25-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK25-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK25-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK25-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK25-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK25-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK25-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK25-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK25-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK25:       omp_offload.failed:
 // CHECK25-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK25-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK25:       omp_offload.cont:
-// CHECK25-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK25-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP33]])
+// CHECK25-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK25-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP32]])
 // CHECK25-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK25-NEXT:    [[TMP34:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK25-NEXT:    call void @llvm.stackrestore(i8* [[TMP34]])
-// CHECK25-NEXT:    [[TMP35:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK25-NEXT:    ret i32 [[TMP35]]
+// CHECK25-NEXT:    [[TMP33:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK25-NEXT:    call void @llvm.stackrestore(i8* [[TMP33]])
+// CHECK25-NEXT:    [[TMP34:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK25-NEXT:    ret i32 [[TMP34]]
 //
 //
 // CHECK25-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162
@@ -3127,7 +3117,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK25-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK25-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK25-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK25-NEXT:  entry:
 // CHECK25-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK25-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -3179,7 +3169,7 @@ int main (int argc, char **argv) {
 // CHECK25-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TE]], align 4
 // CHECK25-NEXT:    [[TMP22:%.*]] = load i32, i32* [[TH]], align 4
 // CHECK25-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK25-NEXT:    [[TMP23:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l151.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 [[TMP22]])
+// CHECK25-NEXT:    [[TMP23:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l151.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 [[TMP22]])
 // CHECK25-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK25-NEXT:    br i1 [[TMP24]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK25:       omp_offload.failed:
@@ -3277,7 +3267,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK25-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK25-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK25-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK25-NEXT:  entry:
 // CHECK25-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK25-NEXT:    ret void
@@ -3315,14 +3305,14 @@ int main (int argc, char **argv) {
 // CHECK26-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK26-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK26-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK26-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK26-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK26-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK26-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK26-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK26-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK26-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK26-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK26-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK26-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK26-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK26-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK26-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK26-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK26-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK26-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK26-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK26-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK26-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -3331,48 +3321,46 @@ int main (int argc, char **argv) {
 // CHECK26-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK26-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK26-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK26-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK26-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK26-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK26-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK26-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK26-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK26-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK26-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK26-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK26-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK26-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK26-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK26-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK26-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK26-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK26-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK26-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK26-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK26-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK26-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK26-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK26-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK26-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK26-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK26-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK26-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK26-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK26-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK26-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK26-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK26-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK26-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK26-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK26-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK26-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK26-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK26-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK26-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK26-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK26-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK26-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK26-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK26-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK26-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK26-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK26-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK26-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK26-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK26-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK26-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK26-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK26-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK26-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK26-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK26-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK26-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK26-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK26:       omp_offload.failed:
 // CHECK26-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK26-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK26:       omp_offload.cont:
-// CHECK26-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK26-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP33]])
+// CHECK26-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK26-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP32]])
 // CHECK26-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK26-NEXT:    [[TMP34:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK26-NEXT:    call void @llvm.stackrestore(i8* [[TMP34]])
-// CHECK26-NEXT:    [[TMP35:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK26-NEXT:    ret i32 [[TMP35]]
+// CHECK26-NEXT:    [[TMP33:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK26-NEXT:    call void @llvm.stackrestore(i8* [[TMP33]])
+// CHECK26-NEXT:    [[TMP34:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK26-NEXT:    ret i32 [[TMP34]]
 //
 //
 // CHECK26-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162
@@ -3487,7 +3475,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK26-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK26-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK26-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK26-NEXT:  entry:
 // CHECK26-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK26-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -3539,7 +3527,7 @@ int main (int argc, char **argv) {
 // CHECK26-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TE]], align 4
 // CHECK26-NEXT:    [[TMP22:%.*]] = load i32, i32* [[TH]], align 4
 // CHECK26-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK26-NEXT:    [[TMP23:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l151.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 [[TMP22]])
+// CHECK26-NEXT:    [[TMP23:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l151.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 [[TMP22]])
 // CHECK26-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK26-NEXT:    br i1 [[TMP24]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK26:       omp_offload.failed:
@@ -3637,7 +3625,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK26-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK26-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK26-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK26-NEXT:  entry:
 // CHECK26-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK26-NEXT:    ret void
@@ -3674,14 +3662,14 @@ int main (int argc, char **argv) {
 // CHECK27-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK27-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK27-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK27-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK27-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK27-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK27-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK27-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK27-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK27-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK27-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK27-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK27-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK27-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK27-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK27-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK27-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK27-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK27-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK27-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK27-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK27-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -3690,48 +3678,46 @@ int main (int argc, char **argv) {
 // CHECK27-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK27-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK27-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK27-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK27-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK27-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK27-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK27-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK27-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK27-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK27-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK27-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK27-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK27-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK27-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK27-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK27-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK27-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK27-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK27-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK27-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK27-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK27-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK27-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK27-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK27-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK27-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK27-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK27-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK27-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK27-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK27-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK27-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK27-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK27-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK27-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK27-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK27-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK27-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK27-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK27-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK27-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK27-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK27-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK27-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK27-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK27-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK27-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK27-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK27-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK27-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK27-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK27-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK27-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK27-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK27-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK27-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK27-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK27-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK27-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK27:       omp_offload.failed:
 // CHECK27-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK27-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK27:       omp_offload.cont:
-// CHECK27-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK27-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP33]])
+// CHECK27-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK27-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP32]])
 // CHECK27-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK27-NEXT:    [[TMP34:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK27-NEXT:    call void @llvm.stackrestore(i8* [[TMP34]])
-// CHECK27-NEXT:    [[TMP35:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK27-NEXT:    ret i32 [[TMP35]]
+// CHECK27-NEXT:    [[TMP33:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK27-NEXT:    call void @llvm.stackrestore(i8* [[TMP33]])
+// CHECK27-NEXT:    [[TMP34:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK27-NEXT:    ret i32 [[TMP34]]
 //
 //
 // CHECK27-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162
@@ -3844,7 +3830,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK27-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK27-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK27-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK27-NEXT:  entry:
 // CHECK27-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK27-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -3894,7 +3880,7 @@ int main (int argc, char **argv) {
 // CHECK27-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TE]], align 4
 // CHECK27-NEXT:    [[TMP22:%.*]] = load i32, i32* [[TH]], align 4
 // CHECK27-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK27-NEXT:    [[TMP23:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l151.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 [[TMP22]])
+// CHECK27-NEXT:    [[TMP23:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l151.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 [[TMP22]])
 // CHECK27-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK27-NEXT:    br i1 [[TMP24]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK27:       omp_offload.failed:
@@ -3989,7 +3975,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK27-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK27-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK27-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK27-NEXT:  entry:
 // CHECK27-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK27-NEXT:    ret void
@@ -4026,14 +4012,14 @@ int main (int argc, char **argv) {
 // CHECK28-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK28-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK28-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK28-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK28-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK28-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK28-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK28-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK28-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK28-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK28-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK28-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK28-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK28-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK28-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK28-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK28-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK28-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK28-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK28-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK28-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK28-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -4042,48 +4028,46 @@ int main (int argc, char **argv) {
 // CHECK28-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK28-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK28-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK28-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK28-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK28-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK28-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK28-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK28-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK28-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK28-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK28-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK28-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK28-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK28-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK28-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK28-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK28-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK28-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK28-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK28-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK28-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK28-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK28-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK28-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK28-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK28-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK28-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK28-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK28-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK28-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK28-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK28-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK28-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK28-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK28-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK28-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK28-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK28-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK28-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK28-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK28-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK28-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK28-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK28-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK28-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK28-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK28-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK28-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK28-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK28-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK28-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK28-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK28-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK28-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK28-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK28-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK28-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK28-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK28-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK28:       omp_offload.failed:
 // CHECK28-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK28-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK28:       omp_offload.cont:
-// CHECK28-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK28-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP33]])
+// CHECK28-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK28-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP32]])
 // CHECK28-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK28-NEXT:    [[TMP34:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK28-NEXT:    call void @llvm.stackrestore(i8* [[TMP34]])
-// CHECK28-NEXT:    [[TMP35:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK28-NEXT:    ret i32 [[TMP35]]
+// CHECK28-NEXT:    [[TMP33:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK28-NEXT:    call void @llvm.stackrestore(i8* [[TMP33]])
+// CHECK28-NEXT:    [[TMP34:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK28-NEXT:    ret i32 [[TMP34]]
 //
 //
 // CHECK28-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162
@@ -4196,7 +4180,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK28-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK28-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK28-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK28-NEXT:  entry:
 // CHECK28-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK28-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -4246,7 +4230,7 @@ int main (int argc, char **argv) {
 // CHECK28-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TE]], align 4
 // CHECK28-NEXT:    [[TMP22:%.*]] = load i32, i32* [[TH]], align 4
 // CHECK28-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK28-NEXT:    [[TMP23:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l151.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 [[TMP22]])
+// CHECK28-NEXT:    [[TMP23:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l151.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 [[TMP22]])
 // CHECK28-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
 // CHECK28-NEXT:    br i1 [[TMP24]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK28:       omp_offload.failed:
@@ -4341,7 +4325,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK28-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK28-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK28-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK28-NEXT:  entry:
 // CHECK28-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK28-NEXT:    ret void

diff  --git a/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp
index 43af1c914522d..a723629102a4c 100644
--- a/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp
@@ -29,7 +29,7 @@ struct SS{
     #pragma omp teams distribute collapse(2)
     for(int i = 0; i < X; i++) {
       for(int j = 0; j < Y; j++) {
-	a[i][j] = (T)0;
+        a[i][j] = (T)0;
       }
     }
 
@@ -716,14 +716,14 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP9:%.*]] = load i64, i64* [[M_CASTED]], align 8
 // CHECK9-NEXT:    [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]]
 // CHECK9-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
-// CHECK9-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP13]], align 8
-// CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP15]], align 8
-// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP16]], align 8
+// CHECK9-NEXT:    [[TMP12:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP12]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes to i8*), i64 40, i1 false)
+// CHECK9-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP14]], align 8
+// CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP16]], align 8
 // CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK9-NEXT:    store i8* null, i8** [[TMP17]], align 8
 // CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -732,75 +732,69 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP9]], i64* [[TMP21]], align 8
-// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 4, i64* [[TMP22]], align 8
-// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP25]], align 8
-// CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP27]], align 8
-// CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 8, i64* [[TMP28]], align 8
-// CHECK9-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP29]], align 8
-// CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP24]], align 8
+// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP26]], align 8
+// CHECK9-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP27]], align 8
+// CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP3]], i64* [[TMP29]], align 8
+// CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK9-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP3]], i64* [[TMP31]], align 8
-// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP3]], i64* [[TMP33]], align 8
-// CHECK9-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK9-NEXT:    store i64 8, i64* [[TMP34]], align 8
-// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP35]], align 8
-// CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP37]], align 8
-// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP39]], align 8
-// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK9-NEXT:    store i64 [[TMP11]], i64* [[TMP40]], align 8
-// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK9-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP45:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[TMP46:%.*]] = load i32, i32* [[M]], align 4
-// CHECK9-NEXT:    store i32 [[TMP46]], i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK9-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP32]], align 8
+// CHECK9-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP34]], align 8
+// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 8
+// CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK9-NEXT:    store i64 [[TMP11]], i64* [[TMP37]], align 8
+// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK9-NEXT:    store i8* null, i8** [[TMP38]], align 8
+// CHECK9-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP42:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[TMP43:%.*]] = load i32, i32* [[M]], align 4
+// CHECK9-NEXT:    store i32 [[TMP43]], i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK9-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK9-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK9-NEXT:    [[CONV5:%.*]] = sext i32 [[DIV]] to i64
-// CHECK9-NEXT:    [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK9-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP48]], 0
+// CHECK9-NEXT:    [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK9-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0
 // CHECK9-NEXT:    [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
 // CHECK9-NEXT:    [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
 // CHECK9-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]]
 // CHECK9-NEXT:    [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK9-NEXT:    store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK9-NEXT:    [[TMP49:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP49]], 1
+// CHECK9-NEXT:    [[TMP46:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
+// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP46]], 1
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK9-NEXT:    [[TMP50:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l82.region_id, i32 5, i8** [[TMP42]], i8** [[TMP43]], i64* [[TMP44]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
-// CHECK9-NEXT:    br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[TMP47:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l82.region_id, i32 5, i8** [[TMP39]], i8** [[TMP40]], i64* [[TMP41]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0
+// CHECK9-NEXT:    br i1 [[TMP48]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l82(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[TMP52:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP52]])
+// CHECK9-NEXT:    [[TMP49:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP49]])
 // CHECK9-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    [[TMP53:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP53]])
-// CHECK9-NEXT:    [[TMP54:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    ret i32 [[TMP54]]
+// CHECK9-NEXT:    [[TMP50:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP50]])
+// CHECK9-NEXT:    [[TMP51:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    ret i32 [[TMP51]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l82
@@ -974,7 +968,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -995,7 +989,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 20)
-// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK9-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK9-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
@@ -1097,7 +1091,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK9-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK9-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK9-NEXT:    ret void
@@ -1150,14 +1144,14 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP9:%.*]] = load i64, i64* [[M_CASTED]], align 8
 // CHECK10-NEXT:    [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]]
 // CHECK10-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
-// CHECK10-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP13]], align 8
-// CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP15]], align 8
-// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP16]], align 8
+// CHECK10-NEXT:    [[TMP12:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP12]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes to i8*), i64 40, i1 false)
+// CHECK10-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP14]], align 8
+// CHECK10-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP16]], align 8
 // CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK10-NEXT:    store i8* null, i8** [[TMP17]], align 8
 // CHECK10-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1166,75 +1160,69 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP9]], i64* [[TMP21]], align 8
-// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 4, i64* [[TMP22]], align 8
-// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP25]], align 8
-// CHECK10-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP27]], align 8
-// CHECK10-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 8, i64* [[TMP28]], align 8
-// CHECK10-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP29]], align 8
-// CHECK10-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP24]], align 8
+// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP26]], align 8
+// CHECK10-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP27]], align 8
+// CHECK10-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP3]], i64* [[TMP29]], align 8
+// CHECK10-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK10-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP3]], i64* [[TMP31]], align 8
-// CHECK10-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK10-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP3]], i64* [[TMP33]], align 8
-// CHECK10-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK10-NEXT:    store i64 8, i64* [[TMP34]], align 8
-// CHECK10-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK10-NEXT:    store i8* null, i8** [[TMP35]], align 8
-// CHECK10-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK10-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP37]], align 8
-// CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK10-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP39]], align 8
-// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK10-NEXT:    store i64 [[TMP11]], i64* [[TMP40]], align 8
-// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK10-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK10-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP45:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[TMP46:%.*]] = load i32, i32* [[M]], align 4
-// CHECK10-NEXT:    store i32 [[TMP46]], i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK10-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK10-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK10-NEXT:    store i8* null, i8** [[TMP32]], align 8
+// CHECK10-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK10-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP34]], align 8
+// CHECK10-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK10-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 8
+// CHECK10-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK10-NEXT:    store i64 [[TMP11]], i64* [[TMP37]], align 8
+// CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK10-NEXT:    store i8* null, i8** [[TMP38]], align 8
+// CHECK10-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP42:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[TMP43:%.*]] = load i32, i32* [[M]], align 4
+// CHECK10-NEXT:    store i32 [[TMP43]], i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK10-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK10-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK10-NEXT:    [[CONV5:%.*]] = sext i32 [[DIV]] to i64
-// CHECK10-NEXT:    [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK10-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP48]], 0
+// CHECK10-NEXT:    [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK10-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0
 // CHECK10-NEXT:    [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
 // CHECK10-NEXT:    [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
 // CHECK10-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]]
 // CHECK10-NEXT:    [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK10-NEXT:    store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK10-NEXT:    [[TMP49:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP49]], 1
+// CHECK10-NEXT:    [[TMP46:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
+// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP46]], 1
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK10-NEXT:    [[TMP50:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l82.region_id, i32 5, i8** [[TMP42]], i8** [[TMP43]], i64* [[TMP44]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
-// CHECK10-NEXT:    br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK10-NEXT:    [[TMP47:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l82.region_id, i32 5, i8** [[TMP39]], i8** [[TMP40]], i64* [[TMP41]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0
+// CHECK10-NEXT:    br i1 [[TMP48]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
 // CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l82(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK10:       omp_offload.cont:
-// CHECK10-NEXT:    [[TMP52:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP52]])
+// CHECK10-NEXT:    [[TMP49:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP49]])
 // CHECK10-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    [[TMP53:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP53]])
-// CHECK10-NEXT:    [[TMP54:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    ret i32 [[TMP54]]
+// CHECK10-NEXT:    [[TMP50:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP50]])
+// CHECK10-NEXT:    [[TMP51:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK10-NEXT:    ret i32 [[TMP51]]
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l82
@@ -1408,7 +1396,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -1429,7 +1417,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 20)
-// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK10-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK10-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
@@ -1531,7 +1519,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK10-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK10-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK10-NEXT:    ret void
@@ -1581,14 +1569,14 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP0]], [[TMP1]]
 // CHECK11-NEXT:    [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4
 // CHECK11-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
-// CHECK11-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP12]], align 4
-// CHECK11-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP14]], align 4
-// CHECK11-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP15]], align 4
+// CHECK11-NEXT:    [[TMP11:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes to i8*), i32 40, i1 false)
+// CHECK11-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP13]], align 4
+// CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP15]], align 4
 // CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK11-NEXT:    store i8* null, i8** [[TMP16]], align 4
 // CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1597,75 +1585,69 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP7]], i32* [[TMP20]], align 4
-// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP21]], align 4
-// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP22]], align 4
-// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP24]], align 4
-// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP26]], align 4
-// CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 4, i64* [[TMP27]], align 4
-// CHECK11-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP28]], align 4
-// CHECK11-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP21]], align 4
+// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP23]], align 4
+// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP25]], align 4
+// CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP26]], align 4
+// CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP1]], i32* [[TMP28]], align 4
+// CHECK11-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK11-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP1]], i32* [[TMP30]], align 4
-// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP1]], i32* [[TMP32]], align 4
-// CHECK11-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK11-NEXT:    store i64 4, i64* [[TMP33]], align 4
-// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP34]], align 4
-// CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 4
-// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP38]], align 4
-// CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK11-NEXT:    store i64 [[TMP10]], i64* [[TMP39]], align 4
-// CHECK11-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK11-NEXT:    store i8* null, i8** [[TMP40]], align 4
-// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP44:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP44]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[TMP45:%.*]] = load i32, i32* [[M]], align 4
-// CHECK11-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK11-NEXT:    [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP46]], 0
+// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP31]], align 4
+// CHECK11-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP33]], align 4
+// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP35]], align 4
+// CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK11-NEXT:    store i64 [[TMP10]], i64* [[TMP36]], align 4
+// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK11-NEXT:    store i8* null, i8** [[TMP37]], align 4
+// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP41:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP41]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[TMP42:%.*]] = load i32, i32* [[M]], align 4
+// CHECK11-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT:    [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0
 // CHECK11-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK11-NEXT:    [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK11-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK11-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK11-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK11-NEXT:    [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
 // CHECK11-NEXT:    [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
 // CHECK11-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
 // CHECK11-NEXT:    [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK11-NEXT:    store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK11-NEXT:    [[TMP48:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP48]], 1
+// CHECK11-NEXT:    [[TMP45:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP45]], 1
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK11-NEXT:    [[TMP49:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l82.region_id, i32 5, i8** [[TMP41]], i8** [[TMP42]], i64* [[TMP43]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0
-// CHECK11-NEXT:    br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[TMP46:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l82.region_id, i32 5, i8** [[TMP38]], i8** [[TMP39]], i64* [[TMP40]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0
+// CHECK11-NEXT:    br i1 [[TMP47]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l82(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[TMP51:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP51]])
+// CHECK11-NEXT:    [[TMP48:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP48]])
 // CHECK11-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    [[TMP52:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP52]])
-// CHECK11-NEXT:    [[TMP53:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    ret i32 [[TMP53]]
+// CHECK11-NEXT:    [[TMP49:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP49]])
+// CHECK11-NEXT:    [[TMP50:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK11-NEXT:    ret i32 [[TMP50]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l82
@@ -1835,7 +1817,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -1856,7 +1838,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 20)
-// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK11-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK11-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
@@ -1956,7 +1938,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK11-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK11-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK11-NEXT:    ret void
@@ -2006,14 +1988,14 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP0]], [[TMP1]]
 // CHECK12-NEXT:    [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4
 // CHECK12-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
-// CHECK12-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP12]], align 4
-// CHECK12-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP14]], align 4
-// CHECK12-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP15]], align 4
+// CHECK12-NEXT:    [[TMP11:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes to i8*), i32 40, i1 false)
+// CHECK12-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP13]], align 4
+// CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP15]], align 4
 // CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK12-NEXT:    store i8* null, i8** [[TMP16]], align 4
 // CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2022,75 +2004,69 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP7]], i32* [[TMP20]], align 4
-// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP21]], align 4
-// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP22]], align 4
-// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP24]], align 4
-// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP26]], align 4
-// CHECK12-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 4, i64* [[TMP27]], align 4
-// CHECK12-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP28]], align 4
-// CHECK12-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP21]], align 4
+// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP23]], align 4
+// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP25]], align 4
+// CHECK12-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP26]], align 4
+// CHECK12-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP1]], i32* [[TMP28]], align 4
+// CHECK12-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK12-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP1]], i32* [[TMP30]], align 4
-// CHECK12-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK12-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP1]], i32* [[TMP32]], align 4
-// CHECK12-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK12-NEXT:    store i64 4, i64* [[TMP33]], align 4
-// CHECK12-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK12-NEXT:    store i8* null, i8** [[TMP34]], align 4
-// CHECK12-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK12-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 4
-// CHECK12-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK12-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP38]], align 4
-// CHECK12-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK12-NEXT:    store i64 [[TMP10]], i64* [[TMP39]], align 4
-// CHECK12-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK12-NEXT:    store i8* null, i8** [[TMP40]], align 4
-// CHECK12-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP44:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP44]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[TMP45:%.*]] = load i32, i32* [[M]], align 4
-// CHECK12-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK12-NEXT:    [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP46]], 0
+// CHECK12-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK12-NEXT:    store i8* null, i8** [[TMP31]], align 4
+// CHECK12-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK12-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP33]], align 4
+// CHECK12-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK12-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP35]], align 4
+// CHECK12-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK12-NEXT:    store i64 [[TMP10]], i64* [[TMP36]], align 4
+// CHECK12-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK12-NEXT:    store i8* null, i8** [[TMP37]], align 4
+// CHECK12-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP41:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP41]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[TMP42:%.*]] = load i32, i32* [[M]], align 4
+// CHECK12-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK12-NEXT:    [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0
 // CHECK12-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK12-NEXT:    [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK12-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK12-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK12-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK12-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK12-NEXT:    [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
 // CHECK12-NEXT:    [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
 // CHECK12-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
 // CHECK12-NEXT:    [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK12-NEXT:    store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK12-NEXT:    [[TMP48:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP48]], 1
+// CHECK12-NEXT:    [[TMP45:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP45]], 1
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK12-NEXT:    [[TMP49:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l82.region_id, i32 5, i8** [[TMP41]], i8** [[TMP42]], i64* [[TMP43]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0
-// CHECK12-NEXT:    br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK12-NEXT:    [[TMP46:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l82.region_id, i32 5, i8** [[TMP38]], i8** [[TMP39]], i64* [[TMP40]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0
+// CHECK12-NEXT:    br i1 [[TMP47]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
 // CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l82(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK12:       omp_offload.cont:
-// CHECK12-NEXT:    [[TMP51:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP51]])
+// CHECK12-NEXT:    [[TMP48:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP48]])
 // CHECK12-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    [[TMP52:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP52]])
-// CHECK12-NEXT:    [[TMP53:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    ret i32 [[TMP53]]
+// CHECK12-NEXT:    [[TMP49:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP49]])
+// CHECK12-NEXT:    [[TMP50:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK12-NEXT:    ret i32 [[TMP50]]
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l82
@@ -2260,7 +2236,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK12-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -2281,7 +2257,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 20)
-// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK12-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK12-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
@@ -2381,7 +2357,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK12-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK12-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK12-NEXT:    ret void

diff  --git a/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp
index 4d9165dee987e..edea2b7276d84 100644
--- a/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp
@@ -1561,14 +1561,14 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK9-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK9-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK9-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK9-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK9-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK9-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK9-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK9-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK9-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1577,160 +1577,154 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK9-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK9-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK9-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK9-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK9-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK9-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK9-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK9-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK9-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK9-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK9-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK9-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK9-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK9-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK9-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK9-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.2 to i8*), i64 24, i1 false)
 // CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK9-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK9-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK9-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK9-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK9-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK9-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK9-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK9-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK9-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK9-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK9-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK9-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK9-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK9-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK9-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK9-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK9-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP60]])
-// CHECK9-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK9-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK9-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK9-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK9-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP58]])
+// CHECK9-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK9-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK9:       omp_offload.failed16:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK9:       omp_offload.cont17:
-// CHECK9-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[TMP61:%.*]] = load i32, i32* [[N]], align 4
 // CHECK9-NEXT:    [[CONV19:%.*]] = bitcast i64* [[N_CASTED18]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP63]], i32* [[CONV19]], align 4
-// CHECK9-NEXT:    [[TMP64:%.*]] = load i64, i64* [[N_CASTED18]], align 8
-// CHECK9-NEXT:    [[TMP65:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK9-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP64]], i64* [[TMP67]], align 8
-// CHECK9-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP64]], i64* [[TMP69]], align 8
-// CHECK9-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES23]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP70]], align 8
-// CHECK9-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP71]], align 8
-// CHECK9-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 [[TMP61]], i32* [[CONV19]], align 4
+// CHECK9-NEXT:    [[TMP62:%.*]] = load i64, i64* [[N_CASTED18]], align 8
+// CHECK9-NEXT:    [[TMP63:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK9-NEXT:    [[TMP64:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES23]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP64]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.5 to i8*), i64 24, i1 false)
+// CHECK9-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP66:%.*]] = bitcast i8** [[TMP65]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP62]], i64* [[TMP66]], align 8
+// CHECK9-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP62]], i64* [[TMP68]], align 8
+// CHECK9-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP69]], align 8
+// CHECK9-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP71]], align 8
+// CHECK9-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP73]], align 8
-// CHECK9-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP75]], align 8
-// CHECK9-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES23]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 8, i64* [[TMP76]], align 8
-// CHECK9-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP77]], align 8
-// CHECK9-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP79]], align 8
-// CHECK9-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 8
-// CHECK9-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES23]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 [[TMP65]], i64* [[TMP82]], align 8
-// CHECK9-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP83]], align 8
-// CHECK9-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES23]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP87:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP87]], i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK9-NEXT:    [[TMP88:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK9-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[TMP88]], 0
+// CHECK9-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP74]], align 8
+// CHECK9-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP76]], align 8
+// CHECK9-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP78]], align 8
+// CHECK9-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES23]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP63]], i64* [[TMP79]], align 8
+// CHECK9-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP80]], align 8
+// CHECK9-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES23]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP84:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP84]], i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK9-NEXT:    [[TMP85:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK9-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[TMP85]], 0
 // CHECK9-NEXT:    [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1
 // CHECK9-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[DIV28]], 1
 // CHECK9-NEXT:    store i32 [[SUB29]], i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK9-NEXT:    [[TMP89:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK9-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP89]], 1
-// CHECK9-NEXT:    [[TMP90:%.*]] = zext i32 [[ADD30]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP90]])
-// CHECK9-NEXT:    [[TMP91:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110.region_id, i32 3, i8** [[TMP84]], i8** [[TMP85]], i64* [[TMP86]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP92:%.*]] = icmp ne i32 [[TMP91]], 0
-// CHECK9-NEXT:    br i1 [[TMP92]], label [[OMP_OFFLOAD_FAILED31:%.*]], label [[OMP_OFFLOAD_CONT32:%.*]]
+// CHECK9-NEXT:    [[TMP86:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK9-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP86]], 1
+// CHECK9-NEXT:    [[TMP87:%.*]] = zext i32 [[ADD30]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP87]])
+// CHECK9-NEXT:    [[TMP88:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110.region_id, i32 3, i8** [[TMP81]], i8** [[TMP82]], i64* [[TMP83]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP89:%.*]] = icmp ne i32 [[TMP88]], 0
+// CHECK9-NEXT:    br i1 [[TMP89]], label [[OMP_OFFLOAD_FAILED31:%.*]], label [[OMP_OFFLOAD_CONT32:%.*]]
 // CHECK9:       omp_offload.failed31:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110(i64 [[TMP64]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110(i64 [[TMP62]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT32]]
 // CHECK9:       omp_offload.cont32:
-// CHECK9-NEXT:    [[TMP93:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP93]])
+// CHECK9-NEXT:    [[TMP90:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP90]])
 // CHECK9-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    [[TMP94:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP94]])
-// CHECK9-NEXT:    [[TMP95:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    ret i32 [[TMP95]]
+// CHECK9-NEXT:    [[TMP91:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP91]])
+// CHECK9-NEXT:    [[TMP92:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    ret i32 [[TMP92]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100
@@ -1975,11 +1969,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK9-NEXT:    store i32 [[TMP3]], i32* [[CONV1]], align 4
 // CHECK9-NEXT:    [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2096,7 +2090,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -2124,7 +2118,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l79.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l79.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK9-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK9-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
@@ -2142,7 +2136,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK9-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK9-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK9-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK9:       omp_offload.failed5:
@@ -2160,7 +2154,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK9-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK9-NEXT:    [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
 // CHECK9-NEXT:    br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK9:       omp_offload.failed11:
@@ -2176,11 +2170,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK9-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2253,11 +2247,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK9-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2330,11 +2324,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK9-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2419,7 +2413,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK9-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK9-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK9-NEXT:    ret void
@@ -2473,14 +2467,14 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK10-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK10-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK10-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK10-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK10-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK10-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK10-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK10-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK10-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK10-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2489,160 +2483,154 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK10-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK10-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK10-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK10-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK10-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK10-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK10-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK10-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK10-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK10-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK10-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK10-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK10-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK10-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK10-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
 // CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK10:       omp_offload.cont:
-// CHECK10-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK10-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK10-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK10-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK10-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK10-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK10-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK10-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK10-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.2 to i8*), i64 24, i1 false)
 // CHECK10-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK10-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK10-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK10-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK10-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK10-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK10-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK10-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK10-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK10-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK10-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK10-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK10-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK10-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK10-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK10-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK10-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK10-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK10-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK10-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK10-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK10-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK10-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK10-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK10-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK10-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK10-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK10-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK10-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK10-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK10-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK10-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK10-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK10-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK10-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP60]])
-// CHECK10-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK10-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK10-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK10-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK10-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP58]])
+// CHECK10-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK10-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK10:       omp_offload.failed16:
-// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK10:       omp_offload.cont17:
-// CHECK10-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    [[TMP61:%.*]] = load i32, i32* [[N]], align 4
 // CHECK10-NEXT:    [[CONV19:%.*]] = bitcast i64* [[N_CASTED18]] to i32*
-// CHECK10-NEXT:    store i32 [[TMP63]], i32* [[CONV19]], align 4
-// CHECK10-NEXT:    [[TMP64:%.*]] = load i64, i64* [[N_CASTED18]], align 8
-// CHECK10-NEXT:    [[TMP65:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK10-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP64]], i64* [[TMP67]], align 8
-// CHECK10-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP64]], i64* [[TMP69]], align 8
-// CHECK10-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES23]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP70]], align 8
-// CHECK10-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0
-// CHECK10-NEXT:    store i8* null, i8** [[TMP71]], align 8
-// CHECK10-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 1
+// CHECK10-NEXT:    store i32 [[TMP61]], i32* [[CONV19]], align 4
+// CHECK10-NEXT:    [[TMP62:%.*]] = load i64, i64* [[N_CASTED18]], align 8
+// CHECK10-NEXT:    [[TMP63:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK10-NEXT:    [[TMP64:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES23]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP64]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.5 to i8*), i64 24, i1 false)
+// CHECK10-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP66:%.*]] = bitcast i8** [[TMP65]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP62]], i64* [[TMP66]], align 8
+// CHECK10-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP62]], i64* [[TMP68]], align 8
+// CHECK10-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0
+// CHECK10-NEXT:    store i8* null, i8** [[TMP69]], align 8
+// CHECK10-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 1
+// CHECK10-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP71]], align 8
+// CHECK10-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP73]], align 8
-// CHECK10-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 1
-// CHECK10-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP75]], align 8
-// CHECK10-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES23]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 8, i64* [[TMP76]], align 8
-// CHECK10-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP77]], align 8
-// CHECK10-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP79]], align 8
-// CHECK10-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 8
-// CHECK10-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES23]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 [[TMP65]], i64* [[TMP82]], align 8
-// CHECK10-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP83]], align 8
-// CHECK10-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES23]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP87:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP87]], i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK10-NEXT:    [[TMP88:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK10-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[TMP88]], 0
+// CHECK10-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP74]], align 8
+// CHECK10-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP76]], align 8
+// CHECK10-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP78]], align 8
+// CHECK10-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES23]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP63]], i64* [[TMP79]], align 8
+// CHECK10-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP80]], align 8
+// CHECK10-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES23]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP84:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP84]], i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK10-NEXT:    [[TMP85:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK10-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[TMP85]], 0
 // CHECK10-NEXT:    [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1
 // CHECK10-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[DIV28]], 1
 // CHECK10-NEXT:    store i32 [[SUB29]], i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK10-NEXT:    [[TMP89:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK10-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP89]], 1
-// CHECK10-NEXT:    [[TMP90:%.*]] = zext i32 [[ADD30]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP90]])
-// CHECK10-NEXT:    [[TMP91:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110.region_id, i32 3, i8** [[TMP84]], i8** [[TMP85]], i64* [[TMP86]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP92:%.*]] = icmp ne i32 [[TMP91]], 0
-// CHECK10-NEXT:    br i1 [[TMP92]], label [[OMP_OFFLOAD_FAILED31:%.*]], label [[OMP_OFFLOAD_CONT32:%.*]]
+// CHECK10-NEXT:    [[TMP86:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK10-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP86]], 1
+// CHECK10-NEXT:    [[TMP87:%.*]] = zext i32 [[ADD30]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP87]])
+// CHECK10-NEXT:    [[TMP88:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110.region_id, i32 3, i8** [[TMP81]], i8** [[TMP82]], i64* [[TMP83]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP89:%.*]] = icmp ne i32 [[TMP88]], 0
+// CHECK10-NEXT:    br i1 [[TMP89]], label [[OMP_OFFLOAD_FAILED31:%.*]], label [[OMP_OFFLOAD_CONT32:%.*]]
 // CHECK10:       omp_offload.failed31:
-// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110(i64 [[TMP64]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110(i64 [[TMP62]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT32]]
 // CHECK10:       omp_offload.cont32:
-// CHECK10-NEXT:    [[TMP93:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP93]])
+// CHECK10-NEXT:    [[TMP90:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP90]])
 // CHECK10-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    [[TMP94:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP94]])
-// CHECK10-NEXT:    [[TMP95:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    ret i32 [[TMP95]]
+// CHECK10-NEXT:    [[TMP91:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP91]])
+// CHECK10-NEXT:    [[TMP92:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK10-NEXT:    ret i32 [[TMP92]]
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100
@@ -2887,11 +2875,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK10-NEXT:    store i32 [[TMP3]], i32* [[CONV1]], align 4
 // CHECK10-NEXT:    [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..3
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..4
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3008,7 +2996,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -3036,7 +3024,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l79.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l79.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK10-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK10-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
@@ -3054,7 +3042,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK10-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK10-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK10-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK10:       omp_offload.failed5:
@@ -3072,7 +3060,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK10-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK10-NEXT:    [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
 // CHECK10-NEXT:    br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK10:       omp_offload.failed11:
@@ -3088,11 +3076,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK10-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK10-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3165,11 +3153,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK10-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK10-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..7
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3242,11 +3230,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK10-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK10-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..10
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3331,7 +3319,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK10-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK10-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK10-NEXT:    ret void
@@ -3384,14 +3372,14 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK11-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK11-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK11-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK11-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK11-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK11-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK11-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK11-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK11-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -3400,160 +3388,154 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK11-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK11-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK11-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK11-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK11-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK11-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK11-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK11-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK11-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK11-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK11-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK11-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK11-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK11-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK11-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK11-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK11-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK11-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK11-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.2 to i8*), i32 24, i1 false)
 // CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK11-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK11-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK11-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK11-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK11-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK11-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK11-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK11-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK11-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK11-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK11-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK11-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK11-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK11-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK11-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK11-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK11-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK11-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK11-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK11-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK11-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK11-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK11-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP61]])
-// CHECK11-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK11-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK11-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK11-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK11-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP59]])
+// CHECK11-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK11-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK11:       omp_offload.failed15:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK11:       omp_offload.cont16:
-// CHECK11-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP64]], i32* [[N_CASTED17]], align 4
-// CHECK11-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N_CASTED17]], align 4
-// CHECK11-NEXT:    [[TMP66:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK11-NEXT:    [[TMP67:%.*]] = sext i32 [[TMP66]] to i64
-// CHECK11-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP65]], i32* [[TMP69]], align 4
-// CHECK11-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP65]], i32* [[TMP71]], align 4
-// CHECK11-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP72]], align 4
-// CHECK11-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP73]], align 4
-// CHECK11-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP62:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP62]], i32* [[N_CASTED17]], align 4
+// CHECK11-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N_CASTED17]], align 4
+// CHECK11-NEXT:    [[TMP64:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK11-NEXT:    [[TMP65:%.*]] = sext i32 [[TMP64]] to i64
+// CHECK11-NEXT:    [[TMP66:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES21]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP66]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.5 to i8*), i32 24, i1 false)
+// CHECK11-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP63]], i32* [[TMP68]], align 4
+// CHECK11-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP63]], i32* [[TMP70]], align 4
+// CHECK11-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP71]], align 4
+// CHECK11-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP73]], align 4
+// CHECK11-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP75]], align 4
-// CHECK11-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP77]], align 4
-// CHECK11-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP78]], align 4
-// CHECK11-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP79]], align 4
-// CHECK11-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 4
-// CHECK11-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 4
-// CHECK11-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 [[TMP67]], i64* [[TMP84]], align 4
-// CHECK11-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP85]], align 4
-// CHECK11-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP89:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP89]], i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK11-NEXT:    [[TMP90:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK11-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP90]], 0
+// CHECK11-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP76]], align 4
+// CHECK11-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP78]], align 4
+// CHECK11-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP80]], align 4
+// CHECK11-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 2
+// CHECK11-NEXT:    store i64 [[TMP65]], i64* [[TMP81]], align 4
+// CHECK11-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP82]], align 4
+// CHECK11-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP86:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP86]], i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK11-NEXT:    [[TMP87:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK11-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP87]], 0
 // CHECK11-NEXT:    [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1
 // CHECK11-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[DIV26]], 1
 // CHECK11-NEXT:    store i32 [[SUB27]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK11-NEXT:    [[TMP91:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK11-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP91]], 1
-// CHECK11-NEXT:    [[TMP92:%.*]] = zext i32 [[ADD28]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP92]])
-// CHECK11-NEXT:    [[TMP93:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110.region_id, i32 3, i8** [[TMP86]], i8** [[TMP87]], i64* [[TMP88]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP94:%.*]] = icmp ne i32 [[TMP93]], 0
-// CHECK11-NEXT:    br i1 [[TMP94]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
+// CHECK11-NEXT:    [[TMP88:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK11-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP88]], 1
+// CHECK11-NEXT:    [[TMP89:%.*]] = zext i32 [[ADD28]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP89]])
+// CHECK11-NEXT:    [[TMP90:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110.region_id, i32 3, i8** [[TMP83]], i8** [[TMP84]], i64* [[TMP85]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP91:%.*]] = icmp ne i32 [[TMP90]], 0
+// CHECK11-NEXT:    br i1 [[TMP91]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
 // CHECK11:       omp_offload.failed29:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110(i32 [[TMP65]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110(i32 [[TMP63]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT30]]
 // CHECK11:       omp_offload.cont30:
-// CHECK11-NEXT:    [[TMP95:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP95]])
+// CHECK11-NEXT:    [[TMP92:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP92]])
 // CHECK11-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    [[TMP96:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP96]])
-// CHECK11-NEXT:    [[TMP97:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    ret i32 [[TMP97]]
+// CHECK11-NEXT:    [[TMP93:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP93]])
+// CHECK11-NEXT:    [[TMP94:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK11-NEXT:    ret i32 [[TMP94]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100
@@ -3792,11 +3774,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK11-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK11-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -3911,7 +3893,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -3939,7 +3921,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l79.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l79.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK11-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK11-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
@@ -3957,7 +3939,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK11-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK11-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK11-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK11:       omp_offload.failed5:
@@ -3975,7 +3957,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK11-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK11-NEXT:    [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
 // CHECK11-NEXT:    br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK11:       omp_offload.failed11:
@@ -3991,11 +3973,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK11-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -4067,11 +4049,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK11-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -4143,11 +4125,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK11-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -4231,7 +4213,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK11-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK11-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK11-NEXT:    ret void
@@ -4284,14 +4266,14 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK12-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK12-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK12-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK12-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK12-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK12-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK12-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK12-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK12-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK12-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -4300,160 +4282,154 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK12-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK12-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK12-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK12-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK12-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK12-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK12-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK12-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK12-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK12-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK12-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK12-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK12-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK12-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK12-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
 // CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK12:       omp_offload.cont:
-// CHECK12-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK12-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK12-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK12-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK12-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK12-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK12-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK12-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK12-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.2 to i8*), i32 24, i1 false)
 // CHECK12-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK12-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK12-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK12-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK12-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK12-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK12-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK12-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK12-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK12-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK12-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK12-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK12-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK12-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK12-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK12-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK12-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK12-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK12-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK12-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK12-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK12-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK12-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK12-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK12-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK12-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK12-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK12-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK12-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK12-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK12-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK12-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK12-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK12-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK12-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK12-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK12-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK12-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK12-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK12-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP61]])
-// CHECK12-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK12-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK12-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK12-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK12-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP59]])
+// CHECK12-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK12-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK12:       omp_offload.failed15:
-// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK12:       omp_offload.cont16:
-// CHECK12-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP64]], i32* [[N_CASTED17]], align 4
-// CHECK12-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N_CASTED17]], align 4
-// CHECK12-NEXT:    [[TMP66:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK12-NEXT:    [[TMP67:%.*]] = sext i32 [[TMP66]] to i64
-// CHECK12-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP65]], i32* [[TMP69]], align 4
-// CHECK12-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP65]], i32* [[TMP71]], align 4
-// CHECK12-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP72]], align 4
-// CHECK12-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
-// CHECK12-NEXT:    store i8* null, i8** [[TMP73]], align 4
-// CHECK12-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP62:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP62]], i32* [[N_CASTED17]], align 4
+// CHECK12-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N_CASTED17]], align 4
+// CHECK12-NEXT:    [[TMP64:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK12-NEXT:    [[TMP65:%.*]] = sext i32 [[TMP64]] to i64
+// CHECK12-NEXT:    [[TMP66:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES21]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP66]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.5 to i8*), i32 24, i1 false)
+// CHECK12-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP63]], i32* [[TMP68]], align 4
+// CHECK12-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP63]], i32* [[TMP70]], align 4
+// CHECK12-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
+// CHECK12-NEXT:    store i8* null, i8** [[TMP71]], align 4
+// CHECK12-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP73]], align 4
+// CHECK12-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP75]], align 4
-// CHECK12-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
-// CHECK12-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP77]], align 4
-// CHECK12-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP78]], align 4
-// CHECK12-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP79]], align 4
-// CHECK12-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 4
-// CHECK12-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 4
-// CHECK12-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 [[TMP67]], i64* [[TMP84]], align 4
-// CHECK12-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP85]], align 4
-// CHECK12-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP89:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP89]], i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK12-NEXT:    [[TMP90:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK12-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP90]], 0
+// CHECK12-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP76]], align 4
+// CHECK12-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP78]], align 4
+// CHECK12-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP80]], align 4
+// CHECK12-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 2
+// CHECK12-NEXT:    store i64 [[TMP65]], i64* [[TMP81]], align 4
+// CHECK12-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP82]], align 4
+// CHECK12-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP86:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP86]], i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK12-NEXT:    [[TMP87:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK12-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP87]], 0
 // CHECK12-NEXT:    [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1
 // CHECK12-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[DIV26]], 1
 // CHECK12-NEXT:    store i32 [[SUB27]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK12-NEXT:    [[TMP91:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK12-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP91]], 1
-// CHECK12-NEXT:    [[TMP92:%.*]] = zext i32 [[ADD28]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP92]])
-// CHECK12-NEXT:    [[TMP93:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110.region_id, i32 3, i8** [[TMP86]], i8** [[TMP87]], i64* [[TMP88]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP94:%.*]] = icmp ne i32 [[TMP93]], 0
-// CHECK12-NEXT:    br i1 [[TMP94]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
+// CHECK12-NEXT:    [[TMP88:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK12-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP88]], 1
+// CHECK12-NEXT:    [[TMP89:%.*]] = zext i32 [[ADD28]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP89]])
+// CHECK12-NEXT:    [[TMP90:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110.region_id, i32 3, i8** [[TMP83]], i8** [[TMP84]], i64* [[TMP85]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP91:%.*]] = icmp ne i32 [[TMP90]], 0
+// CHECK12-NEXT:    br i1 [[TMP91]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
 // CHECK12:       omp_offload.failed29:
-// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110(i32 [[TMP65]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110(i32 [[TMP63]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT30]]
 // CHECK12:       omp_offload.cont30:
-// CHECK12-NEXT:    [[TMP95:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP95]])
+// CHECK12-NEXT:    [[TMP92:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP92]])
 // CHECK12-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    [[TMP96:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP96]])
-// CHECK12-NEXT:    [[TMP97:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    ret i32 [[TMP97]]
+// CHECK12-NEXT:    [[TMP93:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP93]])
+// CHECK12-NEXT:    [[TMP94:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK12-NEXT:    ret i32 [[TMP94]]
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100
@@ -4692,11 +4668,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK12-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK12-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..3
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..4
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -4811,7 +4787,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK12-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -4839,7 +4815,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l79.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l79.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK12-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK12-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
@@ -4857,7 +4833,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK12-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK12-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK12-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK12:       omp_offload.failed5:
@@ -4875,7 +4851,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK12-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK12-NEXT:    [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
 // CHECK12-NEXT:    br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK12:       omp_offload.failed11:
@@ -4891,11 +4867,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK12-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK12-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -4967,11 +4943,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK12-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK12-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..7
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5043,11 +5019,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK12-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK12-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..10
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5131,7 +5107,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK12-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK12-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK12-NEXT:    ret void

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
index 3afe7678dbd9a..a9c1962442204 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
@@ -2461,14 +2461,14 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK9-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK9-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK9-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK9-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK9-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK9-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK9-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK9-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK9-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2477,46 +2477,44 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK9-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK9-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK9-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK9-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK9-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK9-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK9-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK9-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK9-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
 // CHECK9-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 0
-// CHECK9-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-// CHECK9-NEXT:    [[TMP34:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP34]])
-// CHECK9-NEXT:    ret i32 [[TMP33]]
+// CHECK9-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK9-NEXT:    [[TMP33:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP33]])
+// CHECK9-NEXT:    ret i32 [[TMP32]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73
@@ -2732,7 +2730,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK9-SAME: () #[[ATTR4:[0-9]+]] {
+// CHECK9-SAME: () #[[ATTR5:[0-9]+]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK9-NEXT:    ret void
@@ -2764,14 +2762,14 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK10-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK10-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK10-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK10-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK10-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK10-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK10-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK10-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK10-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK10-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2780,46 +2778,44 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK10-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK10-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK10-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK10-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK10-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK10-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK10-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK10-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK10-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK10-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK10-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK10-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK10-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK10-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK10-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
 // CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK10:       omp_offload.cont:
 // CHECK10-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 0
-// CHECK10-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-// CHECK10-NEXT:    [[TMP34:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP34]])
-// CHECK10-NEXT:    ret i32 [[TMP33]]
+// CHECK10-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK10-NEXT:    [[TMP33:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP33]])
+// CHECK10-NEXT:    ret i32 [[TMP32]]
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73
@@ -3035,7 +3031,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK10-SAME: () #[[ATTR4:[0-9]+]] {
+// CHECK10-SAME: () #[[ATTR5:[0-9]+]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK10-NEXT:    ret void
@@ -3066,14 +3062,14 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK11-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK11-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK11-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK11-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK11-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK11-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK11-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK11-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK11-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -3082,46 +3078,44 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK11-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK11-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK11-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK11-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK11-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK11-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK11-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK11-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK11-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK11-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK11-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK11-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
 // CHECK11-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i32 0
-// CHECK11-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-// CHECK11-NEXT:    [[TMP34:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP34]])
-// CHECK11-NEXT:    ret i32 [[TMP33]]
+// CHECK11-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP33]])
+// CHECK11-NEXT:    ret i32 [[TMP32]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73
@@ -3331,7 +3325,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK11-SAME: () #[[ATTR4:[0-9]+]] {
+// CHECK11-SAME: () #[[ATTR5:[0-9]+]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK11-NEXT:    ret void
@@ -3362,14 +3356,14 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK12-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK12-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK12-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK12-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK12-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK12-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK12-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK12-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK12-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK12-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -3378,46 +3372,44 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK12-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK12-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK12-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK12-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK12-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK12-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK12-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK12-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK12-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK12-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK12-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK12-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK12-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK12-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK12-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
 // CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK12:       omp_offload.cont:
 // CHECK12-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i32 0
-// CHECK12-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-// CHECK12-NEXT:    [[TMP34:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP34]])
-// CHECK12-NEXT:    ret i32 [[TMP33]]
+// CHECK12-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK12-NEXT:    [[TMP33:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP33]])
+// CHECK12-NEXT:    ret i32 [[TMP32]]
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l73
@@ -3627,7 +3619,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK12-SAME: () #[[ATTR4:[0-9]+]] {
+// CHECK12-SAME: () #[[ATTR5:[0-9]+]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK12-NEXT:    ret void
@@ -4459,14 +4451,14 @@ int main (int argc, char **argv) {
 // CHECK25-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK25-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK25-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK25-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK25-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK25-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK25-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK25-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK25-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK25-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK25-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK25-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK25-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK25-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK25-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK25-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK25-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK25-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK25-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK25-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK25-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK25-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -4475,48 +4467,46 @@ int main (int argc, char **argv) {
 // CHECK25-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK25-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK25-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK25-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK25-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK25-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK25-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK25-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK25-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK25-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK25-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK25-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK25-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK25-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK25-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK25-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK25-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK25-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK25-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK25-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK25-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK25-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK25-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK25-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK25-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK25-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK25-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK25-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK25-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK25-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK25-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK25-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK25-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK25-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK25-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK25-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK25-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK25-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK25-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK25-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK25-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK25-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK25-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK25-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK25-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK25-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK25-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK25-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK25-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK25-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK25-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK25-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK25-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK25-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK25-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK25-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK25-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK25-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK25-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK25-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK25:       omp_offload.failed:
 // CHECK25-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK25-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK25:       omp_offload.cont:
-// CHECK25-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK25-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP33]])
+// CHECK25-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK25-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP32]])
 // CHECK25-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK25-NEXT:    [[TMP34:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK25-NEXT:    call void @llvm.stackrestore(i8* [[TMP34]])
-// CHECK25-NEXT:    [[TMP35:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK25-NEXT:    ret i32 [[TMP35]]
+// CHECK25-NEXT:    [[TMP33:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK25-NEXT:    call void @llvm.stackrestore(i8* [[TMP33]])
+// CHECK25-NEXT:    [[TMP34:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK25-NEXT:    ret i32 [[TMP34]]
 //
 //
 // CHECK25-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162
@@ -4732,7 +4722,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK25-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK25-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK25-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK25-NEXT:  entry:
 // CHECK25-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK25-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -4783,7 +4773,7 @@ int main (int argc, char **argv) {
 // CHECK25-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK25-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TE]], align 4
 // CHECK25-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK25-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l151.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 0)
+// CHECK25-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l151.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 0)
 // CHECK25-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK25-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK25:       omp_offload.failed:
@@ -4954,7 +4944,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK25-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK25-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK25-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK25-NEXT:  entry:
 // CHECK25-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK25-NEXT:    ret void
@@ -4992,14 +4982,14 @@ int main (int argc, char **argv) {
 // CHECK26-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK26-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK26-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK26-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK26-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK26-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK26-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK26-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK26-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK26-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK26-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK26-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK26-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK26-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK26-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK26-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK26-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK26-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK26-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK26-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK26-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK26-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -5008,48 +4998,46 @@ int main (int argc, char **argv) {
 // CHECK26-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK26-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK26-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK26-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK26-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK26-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK26-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK26-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK26-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK26-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK26-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK26-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK26-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK26-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK26-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK26-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK26-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK26-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK26-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK26-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK26-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK26-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK26-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK26-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK26-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK26-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK26-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK26-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK26-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK26-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK26-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK26-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK26-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK26-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK26-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK26-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK26-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK26-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK26-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK26-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK26-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK26-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK26-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK26-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK26-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK26-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK26-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK26-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK26-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK26-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK26-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK26-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK26-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK26-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK26-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK26-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK26-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK26-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK26-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK26-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK26:       omp_offload.failed:
 // CHECK26-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK26-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK26:       omp_offload.cont:
-// CHECK26-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK26-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP33]])
+// CHECK26-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK26-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP32]])
 // CHECK26-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK26-NEXT:    [[TMP34:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK26-NEXT:    call void @llvm.stackrestore(i8* [[TMP34]])
-// CHECK26-NEXT:    [[TMP35:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK26-NEXT:    ret i32 [[TMP35]]
+// CHECK26-NEXT:    [[TMP33:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK26-NEXT:    call void @llvm.stackrestore(i8* [[TMP33]])
+// CHECK26-NEXT:    [[TMP34:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK26-NEXT:    ret i32 [[TMP34]]
 //
 //
 // CHECK26-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162
@@ -5265,7 +5253,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK26-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK26-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK26-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK26-NEXT:  entry:
 // CHECK26-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK26-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -5316,7 +5304,7 @@ int main (int argc, char **argv) {
 // CHECK26-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK26-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TE]], align 4
 // CHECK26-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK26-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l151.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 0)
+// CHECK26-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l151.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 0)
 // CHECK26-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK26-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK26:       omp_offload.failed:
@@ -5487,7 +5475,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK26-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK26-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK26-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK26-NEXT:  entry:
 // CHECK26-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK26-NEXT:    ret void
@@ -5524,14 +5512,14 @@ int main (int argc, char **argv) {
 // CHECK27-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK27-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK27-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK27-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK27-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK27-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK27-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK27-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK27-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK27-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK27-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK27-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK27-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK27-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK27-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK27-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK27-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK27-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK27-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK27-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK27-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK27-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -5540,48 +5528,46 @@ int main (int argc, char **argv) {
 // CHECK27-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK27-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK27-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK27-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK27-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK27-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK27-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK27-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK27-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK27-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK27-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK27-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK27-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK27-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK27-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK27-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK27-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK27-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK27-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK27-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK27-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK27-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK27-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK27-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK27-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK27-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK27-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK27-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK27-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK27-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK27-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK27-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK27-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK27-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK27-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK27-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK27-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK27-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK27-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK27-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK27-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK27-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK27-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK27-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK27-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK27-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK27-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK27-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK27-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK27-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK27-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK27-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK27-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK27-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK27-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK27-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK27-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK27-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK27-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK27-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK27:       omp_offload.failed:
 // CHECK27-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK27-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK27:       omp_offload.cont:
-// CHECK27-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK27-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP33]])
+// CHECK27-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK27-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP32]])
 // CHECK27-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK27-NEXT:    [[TMP34:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK27-NEXT:    call void @llvm.stackrestore(i8* [[TMP34]])
-// CHECK27-NEXT:    [[TMP35:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK27-NEXT:    ret i32 [[TMP35]]
+// CHECK27-NEXT:    [[TMP33:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK27-NEXT:    call void @llvm.stackrestore(i8* [[TMP33]])
+// CHECK27-NEXT:    [[TMP34:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK27-NEXT:    ret i32 [[TMP34]]
 //
 //
 // CHECK27-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162
@@ -5791,7 +5777,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK27-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK27-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK27-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK27-NEXT:  entry:
 // CHECK27-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK27-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -5840,7 +5826,7 @@ int main (int argc, char **argv) {
 // CHECK27-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK27-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TE]], align 4
 // CHECK27-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK27-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l151.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 0)
+// CHECK27-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l151.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 0)
 // CHECK27-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK27-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK27:       omp_offload.failed:
@@ -6004,7 +5990,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK27-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK27-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK27-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK27-NEXT:  entry:
 // CHECK27-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK27-NEXT:    ret void
@@ -6041,14 +6027,14 @@ int main (int argc, char **argv) {
 // CHECK28-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK28-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK28-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK28-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK28-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK28-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK28-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK28-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK28-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK28-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK28-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK28-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK28-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK28-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK28-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK28-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK28-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK28-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK28-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK28-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK28-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK28-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -6057,48 +6043,46 @@ int main (int argc, char **argv) {
 // CHECK28-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK28-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK28-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK28-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK28-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK28-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK28-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK28-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK28-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK28-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK28-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK28-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK28-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK28-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK28-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK28-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK28-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK28-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK28-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK28-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK28-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK28-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK28-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK28-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK28-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK28-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK28-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK28-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK28-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK28-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK28-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK28-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK28-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK28-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK28-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK28-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK28-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK28-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK28-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK28-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK28-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK28-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK28-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK28-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK28-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK28-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK28-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK28-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK28-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK28-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK28-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK28-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK28-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK28-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK28-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK28-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK28-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK28-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK28-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK28-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK28:       omp_offload.failed:
 // CHECK28-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK28-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK28:       omp_offload.cont:
-// CHECK28-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK28-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP33]])
+// CHECK28-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK28-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP32]])
 // CHECK28-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK28-NEXT:    [[TMP34:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK28-NEXT:    call void @llvm.stackrestore(i8* [[TMP34]])
-// CHECK28-NEXT:    [[TMP35:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK28-NEXT:    ret i32 [[TMP35]]
+// CHECK28-NEXT:    [[TMP33:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK28-NEXT:    call void @llvm.stackrestore(i8* [[TMP33]])
+// CHECK28-NEXT:    [[TMP34:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK28-NEXT:    ret i32 [[TMP34]]
 //
 //
 // CHECK28-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l162
@@ -6308,7 +6292,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK28-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK28-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK28-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK28-NEXT:  entry:
 // CHECK28-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK28-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -6357,7 +6341,7 @@ int main (int argc, char **argv) {
 // CHECK28-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK28-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TE]], align 4
 // CHECK28-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK28-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l151.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 0)
+// CHECK28-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l151.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 0)
 // CHECK28-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK28-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK28:       omp_offload.failed:
@@ -6521,7 +6505,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK28-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK28-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK28-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK28-NEXT:  entry:
 // CHECK28-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK28-NEXT:    ret void

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp
index 6411cda627db3..f3a05078239e9 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp
@@ -1013,14 +1013,14 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP9:%.*]] = load i64, i64* [[M_CASTED]], align 8
 // CHECK9-NEXT:    [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]]
 // CHECK9-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
-// CHECK9-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP13]], align 8
-// CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP15]], align 8
-// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP16]], align 8
+// CHECK9-NEXT:    [[TMP12:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP12]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes to i8*), i64 40, i1 false)
+// CHECK9-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP14]], align 8
+// CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP16]], align 8
 // CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK9-NEXT:    store i8* null, i8** [[TMP17]], align 8
 // CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1029,75 +1029,69 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP9]], i64* [[TMP21]], align 8
-// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 4, i64* [[TMP22]], align 8
-// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP25]], align 8
-// CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP27]], align 8
-// CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 8, i64* [[TMP28]], align 8
-// CHECK9-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP29]], align 8
-// CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP24]], align 8
+// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP26]], align 8
+// CHECK9-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP27]], align 8
+// CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP3]], i64* [[TMP29]], align 8
+// CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK9-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP3]], i64* [[TMP31]], align 8
-// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP3]], i64* [[TMP33]], align 8
-// CHECK9-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK9-NEXT:    store i64 8, i64* [[TMP34]], align 8
-// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP35]], align 8
-// CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP37]], align 8
-// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP39]], align 8
-// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK9-NEXT:    store i64 [[TMP11]], i64* [[TMP40]], align 8
-// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK9-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP45:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[TMP46:%.*]] = load i32, i32* [[M]], align 4
-// CHECK9-NEXT:    store i32 [[TMP46]], i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK9-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP32]], align 8
+// CHECK9-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP34]], align 8
+// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 8
+// CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK9-NEXT:    store i64 [[TMP11]], i64* [[TMP37]], align 8
+// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK9-NEXT:    store i8* null, i8** [[TMP38]], align 8
+// CHECK9-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP42:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[TMP43:%.*]] = load i32, i32* [[M]], align 4
+// CHECK9-NEXT:    store i32 [[TMP43]], i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK9-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK9-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK9-NEXT:    [[CONV5:%.*]] = sext i32 [[DIV]] to i64
-// CHECK9-NEXT:    [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK9-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP48]], 0
+// CHECK9-NEXT:    [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK9-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0
 // CHECK9-NEXT:    [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
 // CHECK9-NEXT:    [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
 // CHECK9-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]]
 // CHECK9-NEXT:    [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK9-NEXT:    store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK9-NEXT:    [[TMP49:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP49]], 1
+// CHECK9-NEXT:    [[TMP46:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
+// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP46]], 1
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK9-NEXT:    [[TMP50:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, i32 5, i8** [[TMP42]], i8** [[TMP43]], i64* [[TMP44]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
-// CHECK9-NEXT:    br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[TMP47:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, i32 5, i8** [[TMP39]], i8** [[TMP40]], i64* [[TMP41]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0
+// CHECK9-NEXT:    br i1 [[TMP48]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[TMP52:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP52]])
+// CHECK9-NEXT:    [[TMP49:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP49]])
 // CHECK9-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    [[TMP53:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP53]])
-// CHECK9-NEXT:    [[TMP54:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    ret i32 [[TMP54]]
+// CHECK9-NEXT:    [[TMP50:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP50]])
+// CHECK9-NEXT:    [[TMP51:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    ret i32 [[TMP51]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83
@@ -1391,7 +1385,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -1412,7 +1406,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 20)
-// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK9-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK9-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
@@ -1589,7 +1583,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK9-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK9-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK9-NEXT:    ret void
@@ -1642,14 +1636,14 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP9:%.*]] = load i64, i64* [[M_CASTED]], align 8
 // CHECK10-NEXT:    [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]]
 // CHECK10-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
-// CHECK10-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP13]], align 8
-// CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP15]], align 8
-// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP16]], align 8
+// CHECK10-NEXT:    [[TMP12:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP12]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes to i8*), i64 40, i1 false)
+// CHECK10-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP14]], align 8
+// CHECK10-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP16]], align 8
 // CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK10-NEXT:    store i8* null, i8** [[TMP17]], align 8
 // CHECK10-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1658,75 +1652,69 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP9]], i64* [[TMP21]], align 8
-// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 4, i64* [[TMP22]], align 8
-// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP25]], align 8
-// CHECK10-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP27]], align 8
-// CHECK10-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 8, i64* [[TMP28]], align 8
-// CHECK10-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP29]], align 8
-// CHECK10-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP24]], align 8
+// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP26]], align 8
+// CHECK10-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP27]], align 8
+// CHECK10-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP3]], i64* [[TMP29]], align 8
+// CHECK10-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK10-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP3]], i64* [[TMP31]], align 8
-// CHECK10-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK10-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP3]], i64* [[TMP33]], align 8
-// CHECK10-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK10-NEXT:    store i64 8, i64* [[TMP34]], align 8
-// CHECK10-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK10-NEXT:    store i8* null, i8** [[TMP35]], align 8
-// CHECK10-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK10-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP37]], align 8
-// CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK10-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP39]], align 8
-// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK10-NEXT:    store i64 [[TMP11]], i64* [[TMP40]], align 8
-// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK10-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK10-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP45:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[TMP46:%.*]] = load i32, i32* [[M]], align 4
-// CHECK10-NEXT:    store i32 [[TMP46]], i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK10-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK10-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK10-NEXT:    store i8* null, i8** [[TMP32]], align 8
+// CHECK10-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK10-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP34]], align 8
+// CHECK10-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK10-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 8
+// CHECK10-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK10-NEXT:    store i64 [[TMP11]], i64* [[TMP37]], align 8
+// CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK10-NEXT:    store i8* null, i8** [[TMP38]], align 8
+// CHECK10-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP42:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[TMP43:%.*]] = load i32, i32* [[M]], align 4
+// CHECK10-NEXT:    store i32 [[TMP43]], i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK10-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK10-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK10-NEXT:    [[CONV5:%.*]] = sext i32 [[DIV]] to i64
-// CHECK10-NEXT:    [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK10-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP48]], 0
+// CHECK10-NEXT:    [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK10-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0
 // CHECK10-NEXT:    [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
 // CHECK10-NEXT:    [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
 // CHECK10-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]]
 // CHECK10-NEXT:    [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK10-NEXT:    store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK10-NEXT:    [[TMP49:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP49]], 1
+// CHECK10-NEXT:    [[TMP46:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
+// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP46]], 1
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK10-NEXT:    [[TMP50:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, i32 5, i8** [[TMP42]], i8** [[TMP43]], i64* [[TMP44]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
-// CHECK10-NEXT:    br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK10-NEXT:    [[TMP47:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, i32 5, i8** [[TMP39]], i8** [[TMP40]], i64* [[TMP41]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0
+// CHECK10-NEXT:    br i1 [[TMP48]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
 // CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK10:       omp_offload.cont:
-// CHECK10-NEXT:    [[TMP52:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP52]])
+// CHECK10-NEXT:    [[TMP49:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP49]])
 // CHECK10-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    [[TMP53:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP53]])
-// CHECK10-NEXT:    [[TMP54:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    ret i32 [[TMP54]]
+// CHECK10-NEXT:    [[TMP50:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP50]])
+// CHECK10-NEXT:    [[TMP51:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK10-NEXT:    ret i32 [[TMP51]]
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83
@@ -2020,7 +2008,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -2041,7 +2029,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 20)
-// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK10-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK10-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
@@ -2218,7 +2206,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK10-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK10-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK10-NEXT:    ret void
@@ -2268,14 +2256,14 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP0]], [[TMP1]]
 // CHECK11-NEXT:    [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4
 // CHECK11-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
-// CHECK11-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP12]], align 4
-// CHECK11-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP14]], align 4
-// CHECK11-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP15]], align 4
+// CHECK11-NEXT:    [[TMP11:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes to i8*), i32 40, i1 false)
+// CHECK11-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP13]], align 4
+// CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP15]], align 4
 // CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK11-NEXT:    store i8* null, i8** [[TMP16]], align 4
 // CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2284,75 +2272,69 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP7]], i32* [[TMP20]], align 4
-// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP21]], align 4
-// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP22]], align 4
-// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP24]], align 4
-// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP26]], align 4
-// CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 4, i64* [[TMP27]], align 4
-// CHECK11-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP28]], align 4
-// CHECK11-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP21]], align 4
+// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP23]], align 4
+// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP25]], align 4
+// CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP26]], align 4
+// CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP1]], i32* [[TMP28]], align 4
+// CHECK11-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK11-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP1]], i32* [[TMP30]], align 4
-// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP1]], i32* [[TMP32]], align 4
-// CHECK11-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK11-NEXT:    store i64 4, i64* [[TMP33]], align 4
-// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP34]], align 4
-// CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 4
-// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP38]], align 4
-// CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK11-NEXT:    store i64 [[TMP10]], i64* [[TMP39]], align 4
-// CHECK11-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK11-NEXT:    store i8* null, i8** [[TMP40]], align 4
-// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP44:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP44]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[TMP45:%.*]] = load i32, i32* [[M]], align 4
-// CHECK11-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK11-NEXT:    [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP46]], 0
+// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP31]], align 4
+// CHECK11-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP33]], align 4
+// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP35]], align 4
+// CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK11-NEXT:    store i64 [[TMP10]], i64* [[TMP36]], align 4
+// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK11-NEXT:    store i8* null, i8** [[TMP37]], align 4
+// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP41:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP41]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[TMP42:%.*]] = load i32, i32* [[M]], align 4
+// CHECK11-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT:    [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0
 // CHECK11-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK11-NEXT:    [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK11-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK11-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK11-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK11-NEXT:    [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
 // CHECK11-NEXT:    [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
 // CHECK11-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
 // CHECK11-NEXT:    [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK11-NEXT:    store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK11-NEXT:    [[TMP48:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP48]], 1
+// CHECK11-NEXT:    [[TMP45:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP45]], 1
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK11-NEXT:    [[TMP49:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, i32 5, i8** [[TMP41]], i8** [[TMP42]], i64* [[TMP43]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0
-// CHECK11-NEXT:    br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[TMP46:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, i32 5, i8** [[TMP38]], i8** [[TMP39]], i64* [[TMP40]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0
+// CHECK11-NEXT:    br i1 [[TMP47]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[TMP51:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP51]])
+// CHECK11-NEXT:    [[TMP48:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP48]])
 // CHECK11-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    [[TMP52:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP52]])
-// CHECK11-NEXT:    [[TMP53:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    ret i32 [[TMP53]]
+// CHECK11-NEXT:    [[TMP49:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP49]])
+// CHECK11-NEXT:    [[TMP50:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK11-NEXT:    ret i32 [[TMP50]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83
@@ -2646,7 +2628,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -2667,7 +2649,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 20)
-// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK11-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK11-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
@@ -2838,7 +2820,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK11-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK11-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK11-NEXT:    ret void
@@ -2888,14 +2870,14 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP0]], [[TMP1]]
 // CHECK12-NEXT:    [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4
 // CHECK12-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
-// CHECK12-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP12]], align 4
-// CHECK12-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP14]], align 4
-// CHECK12-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP15]], align 4
+// CHECK12-NEXT:    [[TMP11:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes to i8*), i32 40, i1 false)
+// CHECK12-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP13]], align 4
+// CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP15]], align 4
 // CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK12-NEXT:    store i8* null, i8** [[TMP16]], align 4
 // CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2904,75 +2886,69 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP7]], i32* [[TMP20]], align 4
-// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP21]], align 4
-// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP22]], align 4
-// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP24]], align 4
-// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP26]], align 4
-// CHECK12-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 4, i64* [[TMP27]], align 4
-// CHECK12-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP28]], align 4
-// CHECK12-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP21]], align 4
+// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP23]], align 4
+// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP25]], align 4
+// CHECK12-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP26]], align 4
+// CHECK12-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP1]], i32* [[TMP28]], align 4
+// CHECK12-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK12-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP1]], i32* [[TMP30]], align 4
-// CHECK12-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK12-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP1]], i32* [[TMP32]], align 4
-// CHECK12-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK12-NEXT:    store i64 4, i64* [[TMP33]], align 4
-// CHECK12-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK12-NEXT:    store i8* null, i8** [[TMP34]], align 4
-// CHECK12-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK12-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 4
-// CHECK12-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK12-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP38]], align 4
-// CHECK12-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK12-NEXT:    store i64 [[TMP10]], i64* [[TMP39]], align 4
-// CHECK12-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK12-NEXT:    store i8* null, i8** [[TMP40]], align 4
-// CHECK12-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP44:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP44]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[TMP45:%.*]] = load i32, i32* [[M]], align 4
-// CHECK12-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK12-NEXT:    [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP46]], 0
+// CHECK12-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK12-NEXT:    store i8* null, i8** [[TMP31]], align 4
+// CHECK12-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK12-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP33]], align 4
+// CHECK12-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK12-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP35]], align 4
+// CHECK12-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK12-NEXT:    store i64 [[TMP10]], i64* [[TMP36]], align 4
+// CHECK12-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK12-NEXT:    store i8* null, i8** [[TMP37]], align 4
+// CHECK12-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP41:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP41]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[TMP42:%.*]] = load i32, i32* [[M]], align 4
+// CHECK12-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK12-NEXT:    [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0
 // CHECK12-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK12-NEXT:    [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK12-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK12-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK12-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK12-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK12-NEXT:    [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
 // CHECK12-NEXT:    [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
 // CHECK12-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
 // CHECK12-NEXT:    [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK12-NEXT:    store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK12-NEXT:    [[TMP48:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP48]], 1
+// CHECK12-NEXT:    [[TMP45:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP45]], 1
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK12-NEXT:    [[TMP49:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, i32 5, i8** [[TMP41]], i8** [[TMP42]], i64* [[TMP43]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0
-// CHECK12-NEXT:    br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK12-NEXT:    [[TMP46:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, i32 5, i8** [[TMP38]], i8** [[TMP39]], i64* [[TMP40]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0
+// CHECK12-NEXT:    br i1 [[TMP47]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
 // CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK12:       omp_offload.cont:
-// CHECK12-NEXT:    [[TMP51:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP51]])
+// CHECK12-NEXT:    [[TMP48:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP48]])
 // CHECK12-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    [[TMP52:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP52]])
-// CHECK12-NEXT:    [[TMP53:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    ret i32 [[TMP53]]
+// CHECK12-NEXT:    [[TMP49:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP49]])
+// CHECK12-NEXT:    [[TMP50:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK12-NEXT:    ret i32 [[TMP50]]
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83
@@ -3266,7 +3242,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK12-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -3287,7 +3263,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 20)
-// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK12-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK12-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
@@ -3458,7 +3434,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK12-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK12-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK12-NEXT:    ret void

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp
index ae20d2ea8a474..41205005c97fc 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp
@@ -2440,14 +2440,14 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK9-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK9-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK9-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK9-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK9-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK9-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK9-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK9-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK9-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2456,174 +2456,166 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK9-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK9-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK9-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK9-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK9-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l106.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK9-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK9-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK9-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l106.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK9-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l106(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK9-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK9-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK9-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK9-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK9-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK9-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK9-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i64 24, i1 false)
 // CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK9-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK9-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK9-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK9-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK9-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK9-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK9-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK9-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK9-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK9-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK9-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK9-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK9-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK9-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK9-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK9-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK9-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP60]])
-// CHECK9-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l111.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK9-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK9-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK9-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK9-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP58]])
+// CHECK9-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l111.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK9-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK9:       omp_offload.failed16:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l111(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l111(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK9:       omp_offload.cont17:
-// CHECK9-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M]], align 4
+// CHECK9-NEXT:    [[TMP61:%.*]] = load i32, i32* [[M]], align 4
 // CHECK9-NEXT:    [[CONV18:%.*]] = bitcast i64* [[M_CASTED]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP63]], i32* [[CONV18]], align 4
-// CHECK9-NEXT:    [[TMP64:%.*]] = load i64, i64* [[M_CASTED]], align 8
-// CHECK9-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP61]], i32* [[CONV18]], align 4
+// CHECK9-NEXT:    [[TMP62:%.*]] = load i64, i64* [[M_CASTED]], align 8
+// CHECK9-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
 // CHECK9-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP65]], i32* [[CONV20]], align 4
-// CHECK9-NEXT:    [[TMP66:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK9-NEXT:    [[TMP67:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK9-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP64]], i64* [[TMP69]], align 8
-// CHECK9-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP64]], i64* [[TMP71]], align 8
-// CHECK9-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP72]], align 8
-// CHECK9-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP73]], align 8
-// CHECK9-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 [[TMP63]], i32* [[CONV20]], align 4
+// CHECK9-NEXT:    [[TMP64:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK9-NEXT:    [[TMP65:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK9-NEXT:    [[TMP66:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES24]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP66]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i64 32, i1 false)
+// CHECK9-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP62]], i64* [[TMP68]], align 8
+// CHECK9-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP62]], i64* [[TMP70]], align 8
+// CHECK9-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP71]], align 8
+// CHECK9-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP64]], i64* [[TMP73]], align 8
+// CHECK9-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP66]], i64* [[TMP75]], align 8
-// CHECK9-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP66]], i64* [[TMP77]], align 8
-// CHECK9-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 4, i64* [[TMP78]], align 8
-// CHECK9-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP79]], align 8
-// CHECK9-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP81]], align 8
-// CHECK9-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP83]], align 8
-// CHECK9-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 8, i64* [[TMP84]], align 8
-// CHECK9-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP85]], align 8
-// CHECK9-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP87]], align 8
-// CHECK9-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP89]], align 8
-// CHECK9-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 3
-// CHECK9-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK9-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP91]], align 8
-// CHECK9-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP95:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP95]], i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK9-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK9-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP96]], 0
+// CHECK9-NEXT:    store i64 [[TMP64]], i64* [[TMP75]], align 8
+// CHECK9-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP76]], align 8
+// CHECK9-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
+// CHECK9-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP80]], align 8
+// CHECK9-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP81]], align 8
+// CHECK9-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 8
+// CHECK9-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP85]], align 8
+// CHECK9-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 3
+// CHECK9-NEXT:    store i64 [[TMP65]], i64* [[TMP86]], align 8
+// CHECK9-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP87]], align 8
+// CHECK9-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP91:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP91]], i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK9-NEXT:    [[TMP92:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK9-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP92]], 0
 // CHECK9-NEXT:    [[DIV29:%.*]] = sdiv i32 [[SUB28]], 1
 // CHECK9-NEXT:    [[SUB30:%.*]] = sub nsw i32 [[DIV29]], 1
 // CHECK9-NEXT:    store i32 [[SUB30]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK9-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK9-NEXT:    [[ADD31:%.*]] = add nsw i32 [[TMP97]], 1
-// CHECK9-NEXT:    [[TMP98:%.*]] = zext i32 [[ADD31]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP98]])
-// CHECK9-NEXT:    [[TMP99:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l116.region_id, i32 4, i8** [[TMP92]], i8** [[TMP93]], i64* [[TMP94]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP100:%.*]] = icmp ne i32 [[TMP99]], 0
-// CHECK9-NEXT:    br i1 [[TMP100]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
+// CHECK9-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK9-NEXT:    [[ADD31:%.*]] = add nsw i32 [[TMP93]], 1
+// CHECK9-NEXT:    [[TMP94:%.*]] = zext i32 [[ADD31]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP94]])
+// CHECK9-NEXT:    [[TMP95:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l116.region_id, i32 4, i8** [[TMP88]], i8** [[TMP89]], i64* [[TMP90]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP96:%.*]] = icmp ne i32 [[TMP95]], 0
+// CHECK9-NEXT:    br i1 [[TMP96]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
 // CHECK9:       omp_offload.failed32:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l116(i64 [[TMP64]], i64 [[TMP66]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l116(i64 [[TMP62]], i64 [[TMP64]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT33]]
 // CHECK9:       omp_offload.cont33:
-// CHECK9-NEXT:    [[TMP101:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP101]])
+// CHECK9-NEXT:    [[TMP97:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP97]])
 // CHECK9-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    [[TMP102:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP102]])
-// CHECK9-NEXT:    [[TMP103:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    ret i32 [[TMP103]]
+// CHECK9-NEXT:    [[TMP98:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP98]])
+// CHECK9-NEXT:    [[TMP99:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    ret i32 [[TMP99]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l106
@@ -3073,11 +3065,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK9-NEXT:    store i32 [[TMP3]], i32* [[CONV2]], align 4
 // CHECK9-NEXT:    [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3159,7 +3151,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK9-NEXT:    store i32 [[TMP21]], i32* [[CONV7]], align 4
 // CHECK9-NEXT:    [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]])
 // CHECK9-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK9:       omp.inner.for.inc:
 // CHECK9-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -3201,7 +3193,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3310,7 +3302,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -3341,7 +3333,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK9-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK9-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
@@ -3359,7 +3351,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK9-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.13, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK9-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK9-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK9:       omp_offload.failed5:
@@ -3389,7 +3381,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK9-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l94.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l94.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK9-NEXT:    [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
 // CHECK9-NEXT:    br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK9:       omp_offload.failed11:
@@ -3405,11 +3397,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK9-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3457,7 +3449,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK9-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK9-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK9-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK9:       omp.inner.for.inc:
 // CHECK9-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -3472,7 +3464,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3555,11 +3547,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK9-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3607,7 +3599,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK9-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK9-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK9-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK9:       omp.inner.for.inc:
 // CHECK9-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -3622,7 +3614,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3716,11 +3708,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK9-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK9-NEXT:    [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3776,7 +3768,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK9-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4
 // CHECK9-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
 // CHECK9-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK9:       omp.inner.for.inc:
 // CHECK9-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -3812,7 +3804,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3893,7 +3885,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK9-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK9-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK9-NEXT:    ret void
@@ -3950,14 +3942,14 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK10-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK10-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK10-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK10-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK10-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK10-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK10-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK10-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK10-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK10-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -3966,174 +3958,166 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK10-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK10-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK10-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK10-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK10-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK10-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK10-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK10-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK10-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK10-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l106.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK10-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK10-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK10-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK10-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l106.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK10-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
 // CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l106(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK10:       omp_offload.cont:
-// CHECK10-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK10-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK10-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK10-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK10-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK10-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK10-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK10-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK10-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i64 24, i1 false)
 // CHECK10-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK10-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK10-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK10-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK10-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK10-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK10-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK10-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK10-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK10-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK10-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK10-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK10-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK10-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK10-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK10-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK10-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK10-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK10-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK10-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK10-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK10-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK10-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK10-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK10-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK10-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK10-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK10-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK10-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK10-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK10-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK10-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK10-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK10-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK10-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP60]])
-// CHECK10-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l111.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK10-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK10-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK10-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK10-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP58]])
+// CHECK10-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l111.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK10-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK10:       omp_offload.failed16:
-// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l111(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l111(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK10:       omp_offload.cont17:
-// CHECK10-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M]], align 4
+// CHECK10-NEXT:    [[TMP61:%.*]] = load i32, i32* [[M]], align 4
 // CHECK10-NEXT:    [[CONV18:%.*]] = bitcast i64* [[M_CASTED]] to i32*
-// CHECK10-NEXT:    store i32 [[TMP63]], i32* [[CONV18]], align 4
-// CHECK10-NEXT:    [[TMP64:%.*]] = load i64, i64* [[M_CASTED]], align 8
-// CHECK10-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP61]], i32* [[CONV18]], align 4
+// CHECK10-NEXT:    [[TMP62:%.*]] = load i64, i64* [[M_CASTED]], align 8
+// CHECK10-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
 // CHECK10-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK10-NEXT:    store i32 [[TMP65]], i32* [[CONV20]], align 4
-// CHECK10-NEXT:    [[TMP66:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK10-NEXT:    [[TMP67:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK10-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP64]], i64* [[TMP69]], align 8
-// CHECK10-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP64]], i64* [[TMP71]], align 8
-// CHECK10-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP72]], align 8
-// CHECK10-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
-// CHECK10-NEXT:    store i8* null, i8** [[TMP73]], align 8
-// CHECK10-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK10-NEXT:    store i32 [[TMP63]], i32* [[CONV20]], align 4
+// CHECK10-NEXT:    [[TMP64:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK10-NEXT:    [[TMP65:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK10-NEXT:    [[TMP66:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES24]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP66]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i64 32, i1 false)
+// CHECK10-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP62]], i64* [[TMP68]], align 8
+// CHECK10-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP62]], i64* [[TMP70]], align 8
+// CHECK10-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
+// CHECK10-NEXT:    store i8* null, i8** [[TMP71]], align 8
+// CHECK10-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK10-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP64]], i64* [[TMP73]], align 8
+// CHECK10-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP66]], i64* [[TMP75]], align 8
-// CHECK10-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
-// CHECK10-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP66]], i64* [[TMP77]], align 8
-// CHECK10-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 4, i64* [[TMP78]], align 8
-// CHECK10-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP79]], align 8
-// CHECK10-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP81]], align 8
-// CHECK10-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP83]], align 8
-// CHECK10-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 8, i64* [[TMP84]], align 8
-// CHECK10-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP85]], align 8
-// CHECK10-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
-// CHECK10-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP87]], align 8
-// CHECK10-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
-// CHECK10-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP89]], align 8
-// CHECK10-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 3
-// CHECK10-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK10-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
-// CHECK10-NEXT:    store i8* null, i8** [[TMP91]], align 8
-// CHECK10-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP95:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP95]], i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK10-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK10-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP96]], 0
+// CHECK10-NEXT:    store i64 [[TMP64]], i64* [[TMP75]], align 8
+// CHECK10-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP76]], align 8
+// CHECK10-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
+// CHECK10-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP80]], align 8
+// CHECK10-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP81]], align 8
+// CHECK10-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 8
+// CHECK10-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP85]], align 8
+// CHECK10-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 3
+// CHECK10-NEXT:    store i64 [[TMP65]], i64* [[TMP86]], align 8
+// CHECK10-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
+// CHECK10-NEXT:    store i8* null, i8** [[TMP87]], align 8
+// CHECK10-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP91:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP91]], i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK10-NEXT:    [[TMP92:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK10-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP92]], 0
 // CHECK10-NEXT:    [[DIV29:%.*]] = sdiv i32 [[SUB28]], 1
 // CHECK10-NEXT:    [[SUB30:%.*]] = sub nsw i32 [[DIV29]], 1
 // CHECK10-NEXT:    store i32 [[SUB30]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK10-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK10-NEXT:    [[ADD31:%.*]] = add nsw i32 [[TMP97]], 1
-// CHECK10-NEXT:    [[TMP98:%.*]] = zext i32 [[ADD31]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP98]])
-// CHECK10-NEXT:    [[TMP99:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l116.region_id, i32 4, i8** [[TMP92]], i8** [[TMP93]], i64* [[TMP94]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP100:%.*]] = icmp ne i32 [[TMP99]], 0
-// CHECK10-NEXT:    br i1 [[TMP100]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
+// CHECK10-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK10-NEXT:    [[ADD31:%.*]] = add nsw i32 [[TMP93]], 1
+// CHECK10-NEXT:    [[TMP94:%.*]] = zext i32 [[ADD31]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP94]])
+// CHECK10-NEXT:    [[TMP95:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l116.region_id, i32 4, i8** [[TMP88]], i8** [[TMP89]], i64* [[TMP90]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP96:%.*]] = icmp ne i32 [[TMP95]], 0
+// CHECK10-NEXT:    br i1 [[TMP96]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
 // CHECK10:       omp_offload.failed32:
-// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l116(i64 [[TMP64]], i64 [[TMP66]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l116(i64 [[TMP62]], i64 [[TMP64]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT33]]
 // CHECK10:       omp_offload.cont33:
-// CHECK10-NEXT:    [[TMP101:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP101]])
+// CHECK10-NEXT:    [[TMP97:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP97]])
 // CHECK10-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    [[TMP102:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP102]])
-// CHECK10-NEXT:    [[TMP103:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    ret i32 [[TMP103]]
+// CHECK10-NEXT:    [[TMP98:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP98]])
+// CHECK10-NEXT:    [[TMP99:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK10-NEXT:    ret i32 [[TMP99]]
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l106
@@ -4583,11 +4567,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK10-NEXT:    store i32 [[TMP3]], i32* [[CONV2]], align 4
 // CHECK10-NEXT:    [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -4669,7 +4653,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK10-NEXT:    store i32 [[TMP21]], i32* [[CONV7]], align 4
 // CHECK10-NEXT:    [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]])
 // CHECK10-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK10:       omp.inner.for.inc:
 // CHECK10-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -4711,7 +4695,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -4820,7 +4804,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -4851,7 +4835,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK10-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK10-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
@@ -4869,7 +4853,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK10-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.13, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK10-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK10-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK10:       omp_offload.failed5:
@@ -4899,7 +4883,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK10-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l94.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l94.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK10-NEXT:    [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
 // CHECK10-NEXT:    br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK10:       omp_offload.failed11:
@@ -4915,11 +4899,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK10-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK10-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -4967,7 +4951,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK10-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK10-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK10-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK10:       omp.inner.for.inc:
 // CHECK10-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -4982,7 +4966,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -5065,11 +5049,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK10-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK10-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -5117,7 +5101,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK10-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK10-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK10-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK10:       omp.inner.for.inc:
 // CHECK10-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -5132,7 +5116,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -5226,11 +5210,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK10-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK10-NEXT:    [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -5286,7 +5270,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK10-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4
 // CHECK10-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
 // CHECK10-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK10:       omp.inner.for.inc:
 // CHECK10-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -5322,7 +5306,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -5403,7 +5387,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK10-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK10-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK10-NEXT:    ret void
@@ -5459,14 +5443,14 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK11-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK11-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK11-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK11-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK11-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK11-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK11-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK11-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK11-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -5475,173 +5459,165 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK11-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK11-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK11-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK11-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK11-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK11-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK11-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK11-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l106.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK11-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK11-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK11-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l106.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK11-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l106(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK11-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK11-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK11-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK11-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK11-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK11-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK11-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i32 24, i1 false)
 // CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK11-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK11-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK11-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK11-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK11-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK11-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK11-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK11-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK11-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK11-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK11-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK11-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK11-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK11-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK11-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK11-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK11-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK11-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK11-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK11-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK11-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK11-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK11-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP61]])
-// CHECK11-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l111.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK11-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK11-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK11-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK11-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP59]])
+// CHECK11-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l111.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK11-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK11:       omp_offload.failed15:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l111(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l111(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK11:       omp_offload.cont16:
-// CHECK11-NEXT:    [[TMP64:%.*]] = load i32, i32* [[M]], align 4
-// CHECK11-NEXT:    store i32 [[TMP64]], i32* [[M_CASTED]], align 4
-// CHECK11-NEXT:    [[TMP65:%.*]] = load i32, i32* [[M_CASTED]], align 4
-// CHECK11-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP66]], i32* [[N_CASTED17]], align 4
-// CHECK11-NEXT:    [[TMP67:%.*]] = load i32, i32* [[N_CASTED17]], align 4
-// CHECK11-NEXT:    [[TMP68:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK11-NEXT:    [[TMP69:%.*]] = sext i32 [[TMP68]] to i64
-// CHECK11-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP65]], i32* [[TMP71]], align 4
-// CHECK11-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP65]], i32* [[TMP73]], align 4
-// CHECK11-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP74]], align 4
-// CHECK11-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP75]], align 4
-// CHECK11-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP62:%.*]] = load i32, i32* [[M]], align 4
+// CHECK11-NEXT:    store i32 [[TMP62]], i32* [[M_CASTED]], align 4
+// CHECK11-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M_CASTED]], align 4
+// CHECK11-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP64]], i32* [[N_CASTED17]], align 4
+// CHECK11-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N_CASTED17]], align 4
+// CHECK11-NEXT:    [[TMP66:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK11-NEXT:    [[TMP67:%.*]] = sext i32 [[TMP66]] to i64
+// CHECK11-NEXT:    [[TMP68:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES21]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP68]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i32 32, i1 false)
+// CHECK11-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP63]], i32* [[TMP70]], align 4
+// CHECK11-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP63]], i32* [[TMP72]], align 4
+// CHECK11-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP73]], align 4
+// CHECK11-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP65]], i32* [[TMP75]], align 4
+// CHECK11-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP67]], i32* [[TMP77]], align 4
-// CHECK11-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP67]], i32* [[TMP79]], align 4
-// CHECK11-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP80]], align 4
-// CHECK11-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP81]], align 4
-// CHECK11-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP83]], align 4
-// CHECK11-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP85]], align 4
-// CHECK11-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 4, i64* [[TMP86]], align 4
-// CHECK11-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP87]], align 4
-// CHECK11-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP89]], align 4
-// CHECK11-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP91]], align 4
-// CHECK11-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 3
-// CHECK11-NEXT:    store i64 [[TMP69]], i64* [[TMP92]], align 4
-// CHECK11-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP93]], align 4
-// CHECK11-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP97:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP97]], i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK11-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK11-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP98]], 0
+// CHECK11-NEXT:    store i32 [[TMP65]], i32* [[TMP77]], align 4
+// CHECK11-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP78]], align 4
+// CHECK11-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
+// CHECK11-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP82]], align 4
+// CHECK11-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP83]], align 4
+// CHECK11-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP85]], align 4
+// CHECK11-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP87]], align 4
+// CHECK11-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 3
+// CHECK11-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 4
+// CHECK11-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP89]], align 4
+// CHECK11-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP93:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP93]], i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK11-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK11-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP94]], 0
 // CHECK11-NEXT:    [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1
 // CHECK11-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[DIV26]], 1
 // CHECK11-NEXT:    store i32 [[SUB27]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK11-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK11-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP99]], 1
-// CHECK11-NEXT:    [[TMP100:%.*]] = zext i32 [[ADD28]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP100]])
-// CHECK11-NEXT:    [[TMP101:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l116.region_id, i32 4, i8** [[TMP94]], i8** [[TMP95]], i64* [[TMP96]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP102:%.*]] = icmp ne i32 [[TMP101]], 0
-// CHECK11-NEXT:    br i1 [[TMP102]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
+// CHECK11-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK11-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP95]], 1
+// CHECK11-NEXT:    [[TMP96:%.*]] = zext i32 [[ADD28]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP96]])
+// CHECK11-NEXT:    [[TMP97:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l116.region_id, i32 4, i8** [[TMP90]], i8** [[TMP91]], i64* [[TMP92]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP98:%.*]] = icmp ne i32 [[TMP97]], 0
+// CHECK11-NEXT:    br i1 [[TMP98]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
 // CHECK11:       omp_offload.failed29:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l116(i32 [[TMP65]], i32 [[TMP67]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l116(i32 [[TMP63]], i32 [[TMP65]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT30]]
 // CHECK11:       omp_offload.cont30:
-// CHECK11-NEXT:    [[TMP103:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP103]])
+// CHECK11-NEXT:    [[TMP99:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP99]])
 // CHECK11-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    [[TMP104:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP104]])
-// CHECK11-NEXT:    [[TMP105:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    ret i32 [[TMP105]]
+// CHECK11-NEXT:    [[TMP100:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP100]])
+// CHECK11-NEXT:    [[TMP101:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK11-NEXT:    ret i32 [[TMP101]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l106
@@ -6076,11 +6052,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK11-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK11-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -6158,7 +6134,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK11-NEXT:    store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK11-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]])
 // CHECK11-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK11:       omp.inner.for.inc:
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -6200,7 +6176,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -6305,7 +6281,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -6336,7 +6312,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK11-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK11-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
@@ -6354,7 +6330,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK11-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.13, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK11-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK11-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK11:       omp_offload.failed5:
@@ -6383,7 +6359,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK11-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l94.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l94.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK11-NEXT:    [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
 // CHECK11-NEXT:    br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK11:       omp_offload.failed11:
@@ -6399,11 +6375,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK11-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -6449,7 +6425,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.body:
 // CHECK11-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK11-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK11-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK11:       omp.inner.for.inc:
 // CHECK11-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -6464,7 +6440,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -6544,11 +6520,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK11-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -6594,7 +6570,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.body:
 // CHECK11-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK11-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK11-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK11:       omp.inner.for.inc:
 // CHECK11-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -6609,7 +6585,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -6698,11 +6674,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK11-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK11-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -6754,7 +6730,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK11-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
 // CHECK11-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK11:       omp.inner.for.inc:
 // CHECK11-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -6790,7 +6766,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -6867,7 +6843,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK11-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK11-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK11-NEXT:    ret void
@@ -6923,14 +6899,14 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK12-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK12-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK12-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK12-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK12-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK12-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK12-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK12-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK12-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK12-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -6939,173 +6915,165 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK12-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK12-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK12-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK12-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK12-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK12-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK12-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK12-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK12-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK12-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l106.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK12-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK12-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK12-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK12-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l106.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK12-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
 // CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l106(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK12:       omp_offload.cont:
-// CHECK12-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK12-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK12-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK12-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK12-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK12-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK12-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK12-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK12-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i32 24, i1 false)
 // CHECK12-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK12-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK12-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK12-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK12-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK12-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK12-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK12-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK12-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK12-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK12-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK12-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK12-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK12-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK12-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK12-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK12-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK12-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK12-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK12-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK12-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK12-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK12-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK12-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK12-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK12-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK12-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK12-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK12-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK12-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK12-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK12-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK12-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK12-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK12-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK12-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK12-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK12-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK12-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK12-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP61]])
-// CHECK12-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l111.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK12-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK12-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK12-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK12-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP59]])
+// CHECK12-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l111.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK12-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK12:       omp_offload.failed15:
-// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l111(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l111(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK12:       omp_offload.cont16:
-// CHECK12-NEXT:    [[TMP64:%.*]] = load i32, i32* [[M]], align 4
-// CHECK12-NEXT:    store i32 [[TMP64]], i32* [[M_CASTED]], align 4
-// CHECK12-NEXT:    [[TMP65:%.*]] = load i32, i32* [[M_CASTED]], align 4
-// CHECK12-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP66]], i32* [[N_CASTED17]], align 4
-// CHECK12-NEXT:    [[TMP67:%.*]] = load i32, i32* [[N_CASTED17]], align 4
-// CHECK12-NEXT:    [[TMP68:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK12-NEXT:    [[TMP69:%.*]] = sext i32 [[TMP68]] to i64
-// CHECK12-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP65]], i32* [[TMP71]], align 4
-// CHECK12-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP65]], i32* [[TMP73]], align 4
-// CHECK12-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP74]], align 4
-// CHECK12-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
-// CHECK12-NEXT:    store i8* null, i8** [[TMP75]], align 4
-// CHECK12-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP62:%.*]] = load i32, i32* [[M]], align 4
+// CHECK12-NEXT:    store i32 [[TMP62]], i32* [[M_CASTED]], align 4
+// CHECK12-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M_CASTED]], align 4
+// CHECK12-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP64]], i32* [[N_CASTED17]], align 4
+// CHECK12-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N_CASTED17]], align 4
+// CHECK12-NEXT:    [[TMP66:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK12-NEXT:    [[TMP67:%.*]] = sext i32 [[TMP66]] to i64
+// CHECK12-NEXT:    [[TMP68:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES21]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP68]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i32 32, i1 false)
+// CHECK12-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP63]], i32* [[TMP70]], align 4
+// CHECK12-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP63]], i32* [[TMP72]], align 4
+// CHECK12-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
+// CHECK12-NEXT:    store i8* null, i8** [[TMP73]], align 4
+// CHECK12-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP65]], i32* [[TMP75]], align 4
+// CHECK12-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP67]], i32* [[TMP77]], align 4
-// CHECK12-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
-// CHECK12-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP67]], i32* [[TMP79]], align 4
-// CHECK12-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP80]], align 4
-// CHECK12-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP81]], align 4
-// CHECK12-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP83]], align 4
-// CHECK12-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP85]], align 4
-// CHECK12-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 4, i64* [[TMP86]], align 4
-// CHECK12-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP87]], align 4
-// CHECK12-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
-// CHECK12-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP89]], align 4
-// CHECK12-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
-// CHECK12-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP91]], align 4
-// CHECK12-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 3
-// CHECK12-NEXT:    store i64 [[TMP69]], i64* [[TMP92]], align 4
-// CHECK12-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
-// CHECK12-NEXT:    store i8* null, i8** [[TMP93]], align 4
-// CHECK12-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP97:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP97]], i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK12-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK12-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP98]], 0
+// CHECK12-NEXT:    store i32 [[TMP65]], i32* [[TMP77]], align 4
+// CHECK12-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP78]], align 4
+// CHECK12-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
+// CHECK12-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP82]], align 4
+// CHECK12-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP83]], align 4
+// CHECK12-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP85]], align 4
+// CHECK12-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP87]], align 4
+// CHECK12-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 3
+// CHECK12-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 4
+// CHECK12-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
+// CHECK12-NEXT:    store i8* null, i8** [[TMP89]], align 4
+// CHECK12-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP93:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP93]], i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK12-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK12-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP94]], 0
 // CHECK12-NEXT:    [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1
 // CHECK12-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[DIV26]], 1
 // CHECK12-NEXT:    store i32 [[SUB27]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK12-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK12-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP99]], 1
-// CHECK12-NEXT:    [[TMP100:%.*]] = zext i32 [[ADD28]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP100]])
-// CHECK12-NEXT:    [[TMP101:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l116.region_id, i32 4, i8** [[TMP94]], i8** [[TMP95]], i64* [[TMP96]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP102:%.*]] = icmp ne i32 [[TMP101]], 0
-// CHECK12-NEXT:    br i1 [[TMP102]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
+// CHECK12-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK12-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP95]], 1
+// CHECK12-NEXT:    [[TMP96:%.*]] = zext i32 [[ADD28]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP96]])
+// CHECK12-NEXT:    [[TMP97:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l116.region_id, i32 4, i8** [[TMP90]], i8** [[TMP91]], i64* [[TMP92]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP98:%.*]] = icmp ne i32 [[TMP97]], 0
+// CHECK12-NEXT:    br i1 [[TMP98]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
 // CHECK12:       omp_offload.failed29:
-// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l116(i32 [[TMP65]], i32 [[TMP67]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l116(i32 [[TMP63]], i32 [[TMP65]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT30]]
 // CHECK12:       omp_offload.cont30:
-// CHECK12-NEXT:    [[TMP103:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP103]])
+// CHECK12-NEXT:    [[TMP99:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP99]])
 // CHECK12-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    [[TMP104:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP104]])
-// CHECK12-NEXT:    [[TMP105:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    ret i32 [[TMP105]]
+// CHECK12-NEXT:    [[TMP100:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP100]])
+// CHECK12-NEXT:    [[TMP101:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK12-NEXT:    ret i32 [[TMP101]]
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l106
@@ -7540,11 +7508,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK12-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK12-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -7622,7 +7590,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK12-NEXT:    store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK12-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]])
 // CHECK12-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK12:       omp.inner.for.inc:
 // CHECK12-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -7664,7 +7632,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -7769,7 +7737,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK12-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -7800,7 +7768,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK12-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK12-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
@@ -7818,7 +7786,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK12-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.13, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK12-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK12-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK12:       omp_offload.failed5:
@@ -7847,7 +7815,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK12-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l94.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l94.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK12-NEXT:    [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
 // CHECK12-NEXT:    br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK12:       omp_offload.failed11:
@@ -7863,11 +7831,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK12-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK12-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -7913,7 +7881,7 @@ int main (int argc, char **argv) {
 // CHECK12:       omp.inner.for.body:
 // CHECK12-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK12-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK12-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK12:       omp.inner.for.inc:
 // CHECK12-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -7928,7 +7896,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -8008,11 +7976,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK12-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK12-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -8058,7 +8026,7 @@ int main (int argc, char **argv) {
 // CHECK12:       omp.inner.for.body:
 // CHECK12-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK12-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK12-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK12:       omp.inner.for.inc:
 // CHECK12-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -8073,7 +8041,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -8162,11 +8130,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK12-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK12-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -8218,7 +8186,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK12-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK12-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
 // CHECK12-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK12:       omp.inner.for.inc:
 // CHECK12-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -8254,7 +8222,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -8331,7 +8299,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK12-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK12-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK12-NEXT:    ret void

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp
index 987c1cc98a5c5..49ba716150467 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp
@@ -7480,14 +7480,14 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK13-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK13-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK13-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK13-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK13-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK13-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK13-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK13-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK13-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK13-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK13-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK13-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK13-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -7496,300 +7496,286 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK13-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK13-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK13-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK13-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK13-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK13-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK13-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK13-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK13-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK13-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK13-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK13-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK13-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK13-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK13-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK13-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK13-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK13-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK13-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK13-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK13-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK13-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK13-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK13-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK13-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK13-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK13-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK13-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK13-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK13-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK13-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK13-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK13-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK13-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK13-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK13-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK13-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK13-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK13-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK13:       omp_offload.failed:
 // CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK13:       omp_offload.cont:
-// CHECK13-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK13-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK13-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK13-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK13-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK13-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK13-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK13-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK13-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK13-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i64 24, i1 false)
 // CHECK13-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK13-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK13-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK13-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK13-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK13-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK13-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK13-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK13-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK13-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK13-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK13-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK13-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK13-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK13-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK13-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK13-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK13-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK13-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK13-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK13-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK13-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK13-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK13-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK13-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK13-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK13-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK13-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK13-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK13-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK13-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK13-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK13-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK13-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK13-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK13-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK13-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK13-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK13-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK13-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK13-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK13-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK13-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK13-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK13-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK13-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK13-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK13-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP60]])
-// CHECK13-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK13-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK13-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK13-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK13-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP58]])
+// CHECK13-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK13-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK13:       omp_offload.failed16:
-// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK13:       omp_offload.cont17:
-// CHECK13-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M]], align 4
+// CHECK13-NEXT:    [[TMP61:%.*]] = load i32, i32* [[M]], align 4
 // CHECK13-NEXT:    [[CONV18:%.*]] = bitcast i64* [[M_CASTED]] to i32*
-// CHECK13-NEXT:    store i32 [[TMP63]], i32* [[CONV18]], align 4
-// CHECK13-NEXT:    [[TMP64:%.*]] = load i64, i64* [[M_CASTED]], align 8
-// CHECK13-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    store i32 [[TMP61]], i32* [[CONV18]], align 4
+// CHECK13-NEXT:    [[TMP62:%.*]] = load i64, i64* [[M_CASTED]], align 8
+// CHECK13-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
 // CHECK13-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK13-NEXT:    store i32 [[TMP65]], i32* [[CONV20]], align 4
-// CHECK13-NEXT:    [[TMP66:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK13-NEXT:    [[TMP67:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK13-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP64]], i64* [[TMP69]], align 8
-// CHECK13-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP64]], i64* [[TMP71]], align 8
-// CHECK13-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
-// CHECK13-NEXT:    store i64 4, i64* [[TMP72]], align 8
-// CHECK13-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
-// CHECK13-NEXT:    store i8* null, i8** [[TMP73]], align 8
-// CHECK13-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK13-NEXT:    store i32 [[TMP63]], i32* [[CONV20]], align 4
+// CHECK13-NEXT:    [[TMP64:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK13-NEXT:    [[TMP65:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK13-NEXT:    [[TMP66:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES24]] to i8*
+// CHECK13-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP66]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i64 32, i1 false)
+// CHECK13-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP62]], i64* [[TMP68]], align 8
+// CHECK13-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP62]], i64* [[TMP70]], align 8
+// CHECK13-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
+// CHECK13-NEXT:    store i8* null, i8** [[TMP71]], align 8
+// CHECK13-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP64]], i64* [[TMP73]], align 8
+// CHECK13-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
 // CHECK13-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP66]], i64* [[TMP75]], align 8
-// CHECK13-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
-// CHECK13-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP66]], i64* [[TMP77]], align 8
-// CHECK13-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 1
-// CHECK13-NEXT:    store i64 4, i64* [[TMP78]], align 8
-// CHECK13-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
-// CHECK13-NEXT:    store i8* null, i8** [[TMP79]], align 8
-// CHECK13-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP81]], align 8
-// CHECK13-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP83]], align 8
-// CHECK13-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 2
-// CHECK13-NEXT:    store i64 8, i64* [[TMP84]], align 8
-// CHECK13-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
-// CHECK13-NEXT:    store i8* null, i8** [[TMP85]], align 8
-// CHECK13-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
-// CHECK13-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP87]], align 8
-// CHECK13-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
-// CHECK13-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP89]], align 8
-// CHECK13-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 3
-// CHECK13-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK13-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
-// CHECK13-NEXT:    store i8* null, i8** [[TMP91]], align 8
-// CHECK13-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP95:%.*]] = load i32, i32* [[N]], align 4
-// CHECK13-NEXT:    store i32 [[TMP95]], i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK13-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK13-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP96]], 0
+// CHECK13-NEXT:    store i64 [[TMP64]], i64* [[TMP75]], align 8
+// CHECK13-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
+// CHECK13-NEXT:    store i8* null, i8** [[TMP76]], align 8
+// CHECK13-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
+// CHECK13-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP80]], align 8
+// CHECK13-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
+// CHECK13-NEXT:    store i8* null, i8** [[TMP81]], align 8
+// CHECK13-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
+// CHECK13-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 8
+// CHECK13-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
+// CHECK13-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP85]], align 8
+// CHECK13-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 3
+// CHECK13-NEXT:    store i64 [[TMP65]], i64* [[TMP86]], align 8
+// CHECK13-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
+// CHECK13-NEXT:    store i8* null, i8** [[TMP87]], align 8
+// CHECK13-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP91:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    store i32 [[TMP91]], i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK13-NEXT:    [[TMP92:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK13-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP92]], 0
 // CHECK13-NEXT:    [[DIV29:%.*]] = sdiv i32 [[SUB28]], 1
 // CHECK13-NEXT:    [[SUB30:%.*]] = sub nsw i32 [[DIV29]], 1
 // CHECK13-NEXT:    store i32 [[SUB30]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK13-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK13-NEXT:    [[ADD31:%.*]] = add nsw i32 [[TMP97]], 1
-// CHECK13-NEXT:    [[TMP98:%.*]] = zext i32 [[ADD31]] to i64
-// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP98]])
-// CHECK13-NEXT:    [[TMP99:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158.region_id, i32 4, i8** [[TMP92]], i8** [[TMP93]], i64* [[TMP94]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP100:%.*]] = icmp ne i32 [[TMP99]], 0
-// CHECK13-NEXT:    br i1 [[TMP100]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
+// CHECK13-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK13-NEXT:    [[ADD31:%.*]] = add nsw i32 [[TMP93]], 1
+// CHECK13-NEXT:    [[TMP94:%.*]] = zext i32 [[ADD31]] to i64
+// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP94]])
+// CHECK13-NEXT:    [[TMP95:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158.region_id, i32 4, i8** [[TMP88]], i8** [[TMP89]], i64* [[TMP90]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP96:%.*]] = icmp ne i32 [[TMP95]], 0
+// CHECK13-NEXT:    br i1 [[TMP96]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
 // CHECK13:       omp_offload.failed32:
-// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158(i64 [[TMP64]], i64 [[TMP66]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158(i64 [[TMP62]], i64 [[TMP64]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT33]]
 // CHECK13:       omp_offload.cont33:
-// CHECK13-NEXT:    [[TMP101:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    [[TMP97:%.*]] = load i32, i32* [[N]], align 4
 // CHECK13-NEXT:    [[CONV35:%.*]] = bitcast i64* [[N_CASTED34]] to i32*
-// CHECK13-NEXT:    store i32 [[TMP101]], i32* [[CONV35]], align 4
-// CHECK13-NEXT:    [[TMP102:%.*]] = load i64, i64* [[N_CASTED34]], align 8
-// CHECK13-NEXT:    [[TMP103:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK13-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP102]], i64* [[TMP105]], align 8
-// CHECK13-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK13-NEXT:    store i32 [[TMP97]], i32* [[CONV35]], align 4
+// CHECK13-NEXT:    [[TMP98:%.*]] = load i64, i64* [[N_CASTED34]], align 8
+// CHECK13-NEXT:    [[TMP99:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK13-NEXT:    [[TMP100:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES39]] to i8*
+// CHECK13-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP100]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i64 24, i1 false)
+// CHECK13-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP98]], i64* [[TMP102]], align 8
+// CHECK13-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP98]], i64* [[TMP104]], align 8
+// CHECK13-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 0
+// CHECK13-NEXT:    store i8* null, i8** [[TMP105]], align 8
+// CHECK13-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 1
 // CHECK13-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP102]], i64* [[TMP107]], align 8
-// CHECK13-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 0
-// CHECK13-NEXT:    store i64 4, i64* [[TMP108]], align 8
-// CHECK13-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 0
-// CHECK13-NEXT:    store i8* null, i8** [[TMP109]], align 8
-// CHECK13-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 1
-// CHECK13-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP111]], align 8
-// CHECK13-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 1
-// CHECK13-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP113]], align 8
-// CHECK13-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 1
-// CHECK13-NEXT:    store i64 8, i64* [[TMP114]], align 8
-// CHECK13-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 1
-// CHECK13-NEXT:    store i8* null, i8** [[TMP115]], align 8
-// CHECK13-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP117]], align 8
-// CHECK13-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP119]], align 8
-// CHECK13-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 2
-// CHECK13-NEXT:    store i64 [[TMP103]], i64* [[TMP120]], align 8
-// CHECK13-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 2
-// CHECK13-NEXT:    store i8* null, i8** [[TMP121]], align 8
-// CHECK13-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP125:%.*]] = load i32, i32* [[N]], align 4
-// CHECK13-NEXT:    store i32 [[TMP125]], i32* [[DOTCAPTURE_EXPR_41]], align 4
-// CHECK13-NEXT:    [[TMP126:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_41]], align 4
-// CHECK13-NEXT:    [[SUB43:%.*]] = sub nsw i32 [[TMP126]], 0
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP107]], align 8
+// CHECK13-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP109]], align 8
+// CHECK13-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 1
+// CHECK13-NEXT:    store i8* null, i8** [[TMP110]], align 8
+// CHECK13-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP112]], align 8
+// CHECK13-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP114]], align 8
+// CHECK13-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 2
+// CHECK13-NEXT:    store i64 [[TMP99]], i64* [[TMP115]], align 8
+// CHECK13-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 2
+// CHECK13-NEXT:    store i8* null, i8** [[TMP116]], align 8
+// CHECK13-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP120:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    store i32 [[TMP120]], i32* [[DOTCAPTURE_EXPR_41]], align 4
+// CHECK13-NEXT:    [[TMP121:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_41]], align 4
+// CHECK13-NEXT:    [[SUB43:%.*]] = sub nsw i32 [[TMP121]], 0
 // CHECK13-NEXT:    [[DIV44:%.*]] = sdiv i32 [[SUB43]], 1
 // CHECK13-NEXT:    [[SUB45:%.*]] = sub nsw i32 [[DIV44]], 1
 // CHECK13-NEXT:    store i32 [[SUB45]], i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK13-NEXT:    [[TMP127:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK13-NEXT:    [[ADD46:%.*]] = add nsw i32 [[TMP127]], 1
-// CHECK13-NEXT:    [[TMP128:%.*]] = zext i32 [[ADD46]] to i64
-// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP128]])
-// CHECK13-NEXT:    [[TMP129:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163.region_id, i32 3, i8** [[TMP122]], i8** [[TMP123]], i64* [[TMP124]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP130:%.*]] = icmp ne i32 [[TMP129]], 0
-// CHECK13-NEXT:    br i1 [[TMP130]], label [[OMP_OFFLOAD_FAILED47:%.*]], label [[OMP_OFFLOAD_CONT48:%.*]]
+// CHECK13-NEXT:    [[TMP122:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
+// CHECK13-NEXT:    [[ADD46:%.*]] = add nsw i32 [[TMP122]], 1
+// CHECK13-NEXT:    [[TMP123:%.*]] = zext i32 [[ADD46]] to i64
+// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP123]])
+// CHECK13-NEXT:    [[TMP124:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163.region_id, i32 3, i8** [[TMP117]], i8** [[TMP118]], i64* [[TMP119]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP125:%.*]] = icmp ne i32 [[TMP124]], 0
+// CHECK13-NEXT:    br i1 [[TMP125]], label [[OMP_OFFLOAD_FAILED47:%.*]], label [[OMP_OFFLOAD_CONT48:%.*]]
 // CHECK13:       omp_offload.failed47:
-// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163(i64 [[TMP102]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163(i64 [[TMP98]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT48]]
 // CHECK13:       omp_offload.cont48:
-// CHECK13-NEXT:    [[TMP131:%.*]] = load i32, i32* [[M]], align 4
+// CHECK13-NEXT:    [[TMP126:%.*]] = load i32, i32* [[M]], align 4
 // CHECK13-NEXT:    [[CONV50:%.*]] = bitcast i64* [[M_CASTED49]] to i32*
-// CHECK13-NEXT:    store i32 [[TMP131]], i32* [[CONV50]], align 4
-// CHECK13-NEXT:    [[TMP132:%.*]] = load i64, i64* [[M_CASTED49]], align 8
-// CHECK13-NEXT:    [[TMP133:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    store i32 [[TMP126]], i32* [[CONV50]], align 4
+// CHECK13-NEXT:    [[TMP127:%.*]] = load i64, i64* [[M_CASTED49]], align 8
+// CHECK13-NEXT:    [[TMP128:%.*]] = load i32, i32* [[N]], align 4
 // CHECK13-NEXT:    [[CONV52:%.*]] = bitcast i64* [[N_CASTED51]] to i32*
-// CHECK13-NEXT:    store i32 [[TMP133]], i32* [[CONV52]], align 4
-// CHECK13-NEXT:    [[TMP134:%.*]] = load i64, i64* [[N_CASTED51]], align 8
-// CHECK13-NEXT:    [[TMP135:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK13-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP132]], i64* [[TMP137]], align 8
-// CHECK13-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP132]], i64* [[TMP139]], align 8
-// CHECK13-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 0
-// CHECK13-NEXT:    store i64 4, i64* [[TMP140]], align 8
-// CHECK13-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 0
+// CHECK13-NEXT:    store i32 [[TMP128]], i32* [[CONV52]], align 4
+// CHECK13-NEXT:    [[TMP129:%.*]] = load i64, i64* [[N_CASTED51]], align 8
+// CHECK13-NEXT:    [[TMP130:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK13-NEXT:    [[TMP131:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES56]] to i8*
+// CHECK13-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP131]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i64 32, i1 false)
+// CHECK13-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP127]], i64* [[TMP133]], align 8
+// CHECK13-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP127]], i64* [[TMP135]], align 8
+// CHECK13-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 0
+// CHECK13-NEXT:    store i8* null, i8** [[TMP136]], align 8
+// CHECK13-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP129]], i64* [[TMP138]], align 8
+// CHECK13-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP129]], i64* [[TMP140]], align 8
+// CHECK13-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 1
 // CHECK13-NEXT:    store i8* null, i8** [[TMP141]], align 8
-// CHECK13-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 2
 // CHECK13-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP134]], i64* [[TMP143]], align 8
-// CHECK13-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 1
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP143]], align 8
+// CHECK13-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 2
 // CHECK13-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP134]], i64* [[TMP145]], align 8
-// CHECK13-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 1
-// CHECK13-NEXT:    store i64 4, i64* [[TMP146]], align 8
-// CHECK13-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 1
-// CHECK13-NEXT:    store i8* null, i8** [[TMP147]], align 8
-// CHECK13-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP149]], align 8
-// CHECK13-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP151]], align 8
-// CHECK13-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 2
-// CHECK13-NEXT:    store i64 8, i64* [[TMP152]], align 8
-// CHECK13-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 2
-// CHECK13-NEXT:    store i8* null, i8** [[TMP153]], align 8
-// CHECK13-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 3
-// CHECK13-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP155]], align 8
-// CHECK13-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 3
-// CHECK13-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP157]], align 8
-// CHECK13-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 3
-// CHECK13-NEXT:    store i64 [[TMP135]], i64* [[TMP158]], align 8
-// CHECK13-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 3
-// CHECK13-NEXT:    store i8* null, i8** [[TMP159]], align 8
-// CHECK13-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP163:%.*]] = load i32, i32* [[N]], align 4
-// CHECK13-NEXT:    store i32 [[TMP163]], i32* [[DOTCAPTURE_EXPR_58]], align 4
-// CHECK13-NEXT:    [[TMP164:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_58]], align 4
-// CHECK13-NEXT:    [[SUB60:%.*]] = sub nsw i32 [[TMP164]], 0
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP145]], align 8
+// CHECK13-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 2
+// CHECK13-NEXT:    store i8* null, i8** [[TMP146]], align 8
+// CHECK13-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 3
+// CHECK13-NEXT:    [[TMP148:%.*]] = bitcast i8** [[TMP147]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP148]], align 8
+// CHECK13-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 3
+// CHECK13-NEXT:    [[TMP150:%.*]] = bitcast i8** [[TMP149]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP150]], align 8
+// CHECK13-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 3
+// CHECK13-NEXT:    store i64 [[TMP130]], i64* [[TMP151]], align 8
+// CHECK13-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 3
+// CHECK13-NEXT:    store i8* null, i8** [[TMP152]], align 8
+// CHECK13-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP156:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    store i32 [[TMP156]], i32* [[DOTCAPTURE_EXPR_58]], align 4
+// CHECK13-NEXT:    [[TMP157:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_58]], align 4
+// CHECK13-NEXT:    [[SUB60:%.*]] = sub nsw i32 [[TMP157]], 0
 // CHECK13-NEXT:    [[DIV61:%.*]] = sdiv i32 [[SUB60]], 1
 // CHECK13-NEXT:    [[SUB62:%.*]] = sub nsw i32 [[DIV61]], 1
 // CHECK13-NEXT:    store i32 [[SUB62]], i32* [[DOTCAPTURE_EXPR_59]], align 4
-// CHECK13-NEXT:    [[TMP165:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_59]], align 4
-// CHECK13-NEXT:    [[ADD63:%.*]] = add nsw i32 [[TMP165]], 1
-// CHECK13-NEXT:    [[TMP166:%.*]] = zext i32 [[ADD63]] to i64
-// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP166]])
-// CHECK13-NEXT:    [[TMP167:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168.region_id, i32 4, i8** [[TMP160]], i8** [[TMP161]], i64* [[TMP162]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP168:%.*]] = icmp ne i32 [[TMP167]], 0
-// CHECK13-NEXT:    br i1 [[TMP168]], label [[OMP_OFFLOAD_FAILED64:%.*]], label [[OMP_OFFLOAD_CONT65:%.*]]
+// CHECK13-NEXT:    [[TMP158:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_59]], align 4
+// CHECK13-NEXT:    [[ADD63:%.*]] = add nsw i32 [[TMP158]], 1
+// CHECK13-NEXT:    [[TMP159:%.*]] = zext i32 [[ADD63]] to i64
+// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP159]])
+// CHECK13-NEXT:    [[TMP160:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168.region_id, i32 4, i8** [[TMP153]], i8** [[TMP154]], i64* [[TMP155]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP161:%.*]] = icmp ne i32 [[TMP160]], 0
+// CHECK13-NEXT:    br i1 [[TMP161]], label [[OMP_OFFLOAD_FAILED64:%.*]], label [[OMP_OFFLOAD_CONT65:%.*]]
 // CHECK13:       omp_offload.failed64:
-// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168(i64 [[TMP132]], i64 [[TMP134]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168(i64 [[TMP127]], i64 [[TMP129]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT65]]
 // CHECK13:       omp_offload.cont65:
-// CHECK13-NEXT:    [[TMP169:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK13-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP169]])
+// CHECK13-NEXT:    [[TMP162:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK13-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP162]])
 // CHECK13-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK13-NEXT:    [[TMP170:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK13-NEXT:    call void @llvm.stackrestore(i8* [[TMP170]])
-// CHECK13-NEXT:    [[TMP171:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK13-NEXT:    ret i32 [[TMP171]]
+// CHECK13-NEXT:    [[TMP163:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK13-NEXT:    call void @llvm.stackrestore(i8* [[TMP163]])
+// CHECK13-NEXT:    [[TMP164:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK13-NEXT:    ret i32 [[TMP164]]
 //
 //
 // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148
@@ -8239,11 +8225,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP3]], i32* [[CONV2]], align 4
 // CHECK13-NEXT:    [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -8325,7 +8311,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP21]], i32* [[CONV7]], align 4
 // CHECK13-NEXT:    [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]])
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -8367,7 +8353,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -8487,11 +8473,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32*
 // CHECK13-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
 // CHECK13-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -8563,7 +8549,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
 // CHECK13-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK13-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]])
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -8582,7 +8568,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -8706,11 +8692,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP3]], i32* [[CONV2]], align 4
 // CHECK13-NEXT:    [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -8790,7 +8776,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP20]], i32* [[CONV7]], align 4
 // CHECK13-NEXT:    [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]])
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -8809,7 +8795,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -8915,7 +8901,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK13-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK13-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK13-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -8955,7 +8941,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK13-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK13-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK13-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK13:       omp_offload.failed:
@@ -8973,7 +8959,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK13-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l121.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l121.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK13-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK13-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK13:       omp_offload.failed5:
@@ -9003,7 +8989,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK13-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l126.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l126.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK13-NEXT:    [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
 // CHECK13-NEXT:    br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK13:       omp_offload.failed11:
@@ -9021,7 +9007,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK13-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l131.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l131.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK13-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
 // CHECK13-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK13:       omp_offload.failed17:
@@ -9051,7 +9037,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK13-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l136.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l136.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK13-NEXT:    [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0
 // CHECK13-NEXT:    br i1 [[TMP58]], label [[OMP_OFFLOAD_FAILED25:%.*]], label [[OMP_OFFLOAD_CONT26:%.*]]
 // CHECK13:       omp_offload.failed25:
@@ -9067,11 +9053,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK13-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK13-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9119,7 +9105,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK13-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK13-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -9134,7 +9120,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9217,11 +9203,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK13-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK13-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9269,7 +9255,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK13-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK13-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -9284,7 +9270,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9378,11 +9364,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK13-NEXT:    [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9438,7 +9424,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4
 // CHECK13-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -9453,7 +9439,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9561,11 +9547,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK13-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK13-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9613,7 +9599,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK13-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK13-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -9628,7 +9614,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9720,11 +9706,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK13-NEXT:    [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9780,7 +9766,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4
 // CHECK13-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -9795,7 +9781,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9875,7 +9861,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK13-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK13-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK13-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK13-NEXT:    ret void
@@ -9949,14 +9935,14 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK14-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK14-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK14-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK14-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK14-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK14-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK14-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK14-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK14-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK14-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK14-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK14-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK14-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -9965,300 +9951,286 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK14-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK14-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK14-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK14-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK14-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK14-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK14-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK14-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK14-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK14-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK14-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK14-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK14-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK14-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK14-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK14-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK14-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK14-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK14-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK14-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK14-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK14-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK14-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK14-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK14-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK14-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK14-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK14-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK14-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK14-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK14-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK14-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK14-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK14-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK14-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK14-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK14-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK14-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK14-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK14-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK14:       omp_offload.failed:
 // CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK14-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK14:       omp_offload.cont:
-// CHECK14-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK14-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK14-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK14-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK14-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK14-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK14-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK14-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK14-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK14-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i64 24, i1 false)
 // CHECK14-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK14-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK14-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK14-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK14-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK14-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK14-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK14-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK14-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK14-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK14-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK14-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK14-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK14-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK14-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK14-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK14-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK14-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK14-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK14-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK14-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK14-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK14-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK14-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK14-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK14-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK14-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK14-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK14-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK14-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK14-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK14-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK14-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK14-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK14-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK14-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK14-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK14-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK14-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK14-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK14-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK14-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK14-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK14-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK14-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK14-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK14-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK14-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP60]])
-// CHECK14-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK14-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK14-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK14-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK14-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK14-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP58]])
+// CHECK14-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK14-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK14:       omp_offload.failed16:
-// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK14-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK14:       omp_offload.cont17:
-// CHECK14-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M]], align 4
+// CHECK14-NEXT:    [[TMP61:%.*]] = load i32, i32* [[M]], align 4
 // CHECK14-NEXT:    [[CONV18:%.*]] = bitcast i64* [[M_CASTED]] to i32*
-// CHECK14-NEXT:    store i32 [[TMP63]], i32* [[CONV18]], align 4
-// CHECK14-NEXT:    [[TMP64:%.*]] = load i64, i64* [[M_CASTED]], align 8
-// CHECK14-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    store i32 [[TMP61]], i32* [[CONV18]], align 4
+// CHECK14-NEXT:    [[TMP62:%.*]] = load i64, i64* [[M_CASTED]], align 8
+// CHECK14-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
 // CHECK14-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK14-NEXT:    store i32 [[TMP65]], i32* [[CONV20]], align 4
-// CHECK14-NEXT:    [[TMP66:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK14-NEXT:    [[TMP67:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK14-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP64]], i64* [[TMP69]], align 8
-// CHECK14-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP64]], i64* [[TMP71]], align 8
-// CHECK14-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
-// CHECK14-NEXT:    store i64 4, i64* [[TMP72]], align 8
-// CHECK14-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
-// CHECK14-NEXT:    store i8* null, i8** [[TMP73]], align 8
-// CHECK14-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK14-NEXT:    store i32 [[TMP63]], i32* [[CONV20]], align 4
+// CHECK14-NEXT:    [[TMP64:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK14-NEXT:    [[TMP65:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK14-NEXT:    [[TMP66:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES24]] to i8*
+// CHECK14-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP66]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i64 32, i1 false)
+// CHECK14-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP62]], i64* [[TMP68]], align 8
+// CHECK14-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP62]], i64* [[TMP70]], align 8
+// CHECK14-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
+// CHECK14-NEXT:    store i8* null, i8** [[TMP71]], align 8
+// CHECK14-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP64]], i64* [[TMP73]], align 8
+// CHECK14-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
 // CHECK14-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP66]], i64* [[TMP75]], align 8
-// CHECK14-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
-// CHECK14-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP66]], i64* [[TMP77]], align 8
-// CHECK14-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 1
-// CHECK14-NEXT:    store i64 4, i64* [[TMP78]], align 8
-// CHECK14-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
-// CHECK14-NEXT:    store i8* null, i8** [[TMP79]], align 8
-// CHECK14-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP81]], align 8
-// CHECK14-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP83]], align 8
-// CHECK14-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 2
-// CHECK14-NEXT:    store i64 8, i64* [[TMP84]], align 8
-// CHECK14-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
-// CHECK14-NEXT:    store i8* null, i8** [[TMP85]], align 8
-// CHECK14-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
-// CHECK14-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP87]], align 8
-// CHECK14-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
-// CHECK14-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP89]], align 8
-// CHECK14-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 3
-// CHECK14-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK14-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
-// CHECK14-NEXT:    store i8* null, i8** [[TMP91]], align 8
-// CHECK14-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP95:%.*]] = load i32, i32* [[N]], align 4
-// CHECK14-NEXT:    store i32 [[TMP95]], i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK14-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK14-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP96]], 0
+// CHECK14-NEXT:    store i64 [[TMP64]], i64* [[TMP75]], align 8
+// CHECK14-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
+// CHECK14-NEXT:    store i8* null, i8** [[TMP76]], align 8
+// CHECK14-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
+// CHECK14-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP80]], align 8
+// CHECK14-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
+// CHECK14-NEXT:    store i8* null, i8** [[TMP81]], align 8
+// CHECK14-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
+// CHECK14-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 8
+// CHECK14-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
+// CHECK14-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP85]], align 8
+// CHECK14-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 3
+// CHECK14-NEXT:    store i64 [[TMP65]], i64* [[TMP86]], align 8
+// CHECK14-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
+// CHECK14-NEXT:    store i8* null, i8** [[TMP87]], align 8
+// CHECK14-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP91:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    store i32 [[TMP91]], i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK14-NEXT:    [[TMP92:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK14-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP92]], 0
 // CHECK14-NEXT:    [[DIV29:%.*]] = sdiv i32 [[SUB28]], 1
 // CHECK14-NEXT:    [[SUB30:%.*]] = sub nsw i32 [[DIV29]], 1
 // CHECK14-NEXT:    store i32 [[SUB30]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK14-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK14-NEXT:    [[ADD31:%.*]] = add nsw i32 [[TMP97]], 1
-// CHECK14-NEXT:    [[TMP98:%.*]] = zext i32 [[ADD31]] to i64
-// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP98]])
-// CHECK14-NEXT:    [[TMP99:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158.region_id, i32 4, i8** [[TMP92]], i8** [[TMP93]], i64* [[TMP94]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK14-NEXT:    [[TMP100:%.*]] = icmp ne i32 [[TMP99]], 0
-// CHECK14-NEXT:    br i1 [[TMP100]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
+// CHECK14-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK14-NEXT:    [[ADD31:%.*]] = add nsw i32 [[TMP93]], 1
+// CHECK14-NEXT:    [[TMP94:%.*]] = zext i32 [[ADD31]] to i64
+// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP94]])
+// CHECK14-NEXT:    [[TMP95:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158.region_id, i32 4, i8** [[TMP88]], i8** [[TMP89]], i64* [[TMP90]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP96:%.*]] = icmp ne i32 [[TMP95]], 0
+// CHECK14-NEXT:    br i1 [[TMP96]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
 // CHECK14:       omp_offload.failed32:
-// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158(i64 [[TMP64]], i64 [[TMP66]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158(i64 [[TMP62]], i64 [[TMP64]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK14-NEXT:    br label [[OMP_OFFLOAD_CONT33]]
 // CHECK14:       omp_offload.cont33:
-// CHECK14-NEXT:    [[TMP101:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    [[TMP97:%.*]] = load i32, i32* [[N]], align 4
 // CHECK14-NEXT:    [[CONV35:%.*]] = bitcast i64* [[N_CASTED34]] to i32*
-// CHECK14-NEXT:    store i32 [[TMP101]], i32* [[CONV35]], align 4
-// CHECK14-NEXT:    [[TMP102:%.*]] = load i64, i64* [[N_CASTED34]], align 8
-// CHECK14-NEXT:    [[TMP103:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK14-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP102]], i64* [[TMP105]], align 8
-// CHECK14-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK14-NEXT:    store i32 [[TMP97]], i32* [[CONV35]], align 4
+// CHECK14-NEXT:    [[TMP98:%.*]] = load i64, i64* [[N_CASTED34]], align 8
+// CHECK14-NEXT:    [[TMP99:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK14-NEXT:    [[TMP100:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES39]] to i8*
+// CHECK14-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP100]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i64 24, i1 false)
+// CHECK14-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP98]], i64* [[TMP102]], align 8
+// CHECK14-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP98]], i64* [[TMP104]], align 8
+// CHECK14-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 0
+// CHECK14-NEXT:    store i8* null, i8** [[TMP105]], align 8
+// CHECK14-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 1
 // CHECK14-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP102]], i64* [[TMP107]], align 8
-// CHECK14-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 0
-// CHECK14-NEXT:    store i64 4, i64* [[TMP108]], align 8
-// CHECK14-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 0
-// CHECK14-NEXT:    store i8* null, i8** [[TMP109]], align 8
-// CHECK14-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 1
-// CHECK14-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP111]], align 8
-// CHECK14-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 1
-// CHECK14-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP113]], align 8
-// CHECK14-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 1
-// CHECK14-NEXT:    store i64 8, i64* [[TMP114]], align 8
-// CHECK14-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 1
-// CHECK14-NEXT:    store i8* null, i8** [[TMP115]], align 8
-// CHECK14-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP117]], align 8
-// CHECK14-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP119]], align 8
-// CHECK14-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 2
-// CHECK14-NEXT:    store i64 [[TMP103]], i64* [[TMP120]], align 8
-// CHECK14-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 2
-// CHECK14-NEXT:    store i8* null, i8** [[TMP121]], align 8
-// CHECK14-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP125:%.*]] = load i32, i32* [[N]], align 4
-// CHECK14-NEXT:    store i32 [[TMP125]], i32* [[DOTCAPTURE_EXPR_41]], align 4
-// CHECK14-NEXT:    [[TMP126:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_41]], align 4
-// CHECK14-NEXT:    [[SUB43:%.*]] = sub nsw i32 [[TMP126]], 0
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP107]], align 8
+// CHECK14-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP109]], align 8
+// CHECK14-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 1
+// CHECK14-NEXT:    store i8* null, i8** [[TMP110]], align 8
+// CHECK14-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP112]], align 8
+// CHECK14-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP114]], align 8
+// CHECK14-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 2
+// CHECK14-NEXT:    store i64 [[TMP99]], i64* [[TMP115]], align 8
+// CHECK14-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 2
+// CHECK14-NEXT:    store i8* null, i8** [[TMP116]], align 8
+// CHECK14-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP120:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    store i32 [[TMP120]], i32* [[DOTCAPTURE_EXPR_41]], align 4
+// CHECK14-NEXT:    [[TMP121:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_41]], align 4
+// CHECK14-NEXT:    [[SUB43:%.*]] = sub nsw i32 [[TMP121]], 0
 // CHECK14-NEXT:    [[DIV44:%.*]] = sdiv i32 [[SUB43]], 1
 // CHECK14-NEXT:    [[SUB45:%.*]] = sub nsw i32 [[DIV44]], 1
 // CHECK14-NEXT:    store i32 [[SUB45]], i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK14-NEXT:    [[TMP127:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK14-NEXT:    [[ADD46:%.*]] = add nsw i32 [[TMP127]], 1
-// CHECK14-NEXT:    [[TMP128:%.*]] = zext i32 [[ADD46]] to i64
-// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP128]])
-// CHECK14-NEXT:    [[TMP129:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163.region_id, i32 3, i8** [[TMP122]], i8** [[TMP123]], i64* [[TMP124]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK14-NEXT:    [[TMP130:%.*]] = icmp ne i32 [[TMP129]], 0
-// CHECK14-NEXT:    br i1 [[TMP130]], label [[OMP_OFFLOAD_FAILED47:%.*]], label [[OMP_OFFLOAD_CONT48:%.*]]
+// CHECK14-NEXT:    [[TMP122:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
+// CHECK14-NEXT:    [[ADD46:%.*]] = add nsw i32 [[TMP122]], 1
+// CHECK14-NEXT:    [[TMP123:%.*]] = zext i32 [[ADD46]] to i64
+// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP123]])
+// CHECK14-NEXT:    [[TMP124:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163.region_id, i32 3, i8** [[TMP117]], i8** [[TMP118]], i64* [[TMP119]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP125:%.*]] = icmp ne i32 [[TMP124]], 0
+// CHECK14-NEXT:    br i1 [[TMP125]], label [[OMP_OFFLOAD_FAILED47:%.*]], label [[OMP_OFFLOAD_CONT48:%.*]]
 // CHECK14:       omp_offload.failed47:
-// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163(i64 [[TMP102]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163(i64 [[TMP98]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK14-NEXT:    br label [[OMP_OFFLOAD_CONT48]]
 // CHECK14:       omp_offload.cont48:
-// CHECK14-NEXT:    [[TMP131:%.*]] = load i32, i32* [[M]], align 4
+// CHECK14-NEXT:    [[TMP126:%.*]] = load i32, i32* [[M]], align 4
 // CHECK14-NEXT:    [[CONV50:%.*]] = bitcast i64* [[M_CASTED49]] to i32*
-// CHECK14-NEXT:    store i32 [[TMP131]], i32* [[CONV50]], align 4
-// CHECK14-NEXT:    [[TMP132:%.*]] = load i64, i64* [[M_CASTED49]], align 8
-// CHECK14-NEXT:    [[TMP133:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    store i32 [[TMP126]], i32* [[CONV50]], align 4
+// CHECK14-NEXT:    [[TMP127:%.*]] = load i64, i64* [[M_CASTED49]], align 8
+// CHECK14-NEXT:    [[TMP128:%.*]] = load i32, i32* [[N]], align 4
 // CHECK14-NEXT:    [[CONV52:%.*]] = bitcast i64* [[N_CASTED51]] to i32*
-// CHECK14-NEXT:    store i32 [[TMP133]], i32* [[CONV52]], align 4
-// CHECK14-NEXT:    [[TMP134:%.*]] = load i64, i64* [[N_CASTED51]], align 8
-// CHECK14-NEXT:    [[TMP135:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK14-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP132]], i64* [[TMP137]], align 8
-// CHECK14-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP132]], i64* [[TMP139]], align 8
-// CHECK14-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 0
-// CHECK14-NEXT:    store i64 4, i64* [[TMP140]], align 8
-// CHECK14-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 0
+// CHECK14-NEXT:    store i32 [[TMP128]], i32* [[CONV52]], align 4
+// CHECK14-NEXT:    [[TMP129:%.*]] = load i64, i64* [[N_CASTED51]], align 8
+// CHECK14-NEXT:    [[TMP130:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK14-NEXT:    [[TMP131:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES56]] to i8*
+// CHECK14-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP131]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i64 32, i1 false)
+// CHECK14-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP127]], i64* [[TMP133]], align 8
+// CHECK14-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP127]], i64* [[TMP135]], align 8
+// CHECK14-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 0
+// CHECK14-NEXT:    store i8* null, i8** [[TMP136]], align 8
+// CHECK14-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP129]], i64* [[TMP138]], align 8
+// CHECK14-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP129]], i64* [[TMP140]], align 8
+// CHECK14-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 1
 // CHECK14-NEXT:    store i8* null, i8** [[TMP141]], align 8
-// CHECK14-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 2
 // CHECK14-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP134]], i64* [[TMP143]], align 8
-// CHECK14-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 1
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP143]], align 8
+// CHECK14-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 2
 // CHECK14-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP134]], i64* [[TMP145]], align 8
-// CHECK14-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 1
-// CHECK14-NEXT:    store i64 4, i64* [[TMP146]], align 8
-// CHECK14-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 1
-// CHECK14-NEXT:    store i8* null, i8** [[TMP147]], align 8
-// CHECK14-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP149]], align 8
-// CHECK14-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP151]], align 8
-// CHECK14-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 2
-// CHECK14-NEXT:    store i64 8, i64* [[TMP152]], align 8
-// CHECK14-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 2
-// CHECK14-NEXT:    store i8* null, i8** [[TMP153]], align 8
-// CHECK14-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 3
-// CHECK14-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP155]], align 8
-// CHECK14-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 3
-// CHECK14-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP157]], align 8
-// CHECK14-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 3
-// CHECK14-NEXT:    store i64 [[TMP135]], i64* [[TMP158]], align 8
-// CHECK14-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 3
-// CHECK14-NEXT:    store i8* null, i8** [[TMP159]], align 8
-// CHECK14-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP163:%.*]] = load i32, i32* [[N]], align 4
-// CHECK14-NEXT:    store i32 [[TMP163]], i32* [[DOTCAPTURE_EXPR_58]], align 4
-// CHECK14-NEXT:    [[TMP164:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_58]], align 4
-// CHECK14-NEXT:    [[SUB60:%.*]] = sub nsw i32 [[TMP164]], 0
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP145]], align 8
+// CHECK14-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 2
+// CHECK14-NEXT:    store i8* null, i8** [[TMP146]], align 8
+// CHECK14-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 3
+// CHECK14-NEXT:    [[TMP148:%.*]] = bitcast i8** [[TMP147]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP148]], align 8
+// CHECK14-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 3
+// CHECK14-NEXT:    [[TMP150:%.*]] = bitcast i8** [[TMP149]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP150]], align 8
+// CHECK14-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 3
+// CHECK14-NEXT:    store i64 [[TMP130]], i64* [[TMP151]], align 8
+// CHECK14-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 3
+// CHECK14-NEXT:    store i8* null, i8** [[TMP152]], align 8
+// CHECK14-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP156:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    store i32 [[TMP156]], i32* [[DOTCAPTURE_EXPR_58]], align 4
+// CHECK14-NEXT:    [[TMP157:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_58]], align 4
+// CHECK14-NEXT:    [[SUB60:%.*]] = sub nsw i32 [[TMP157]], 0
 // CHECK14-NEXT:    [[DIV61:%.*]] = sdiv i32 [[SUB60]], 1
 // CHECK14-NEXT:    [[SUB62:%.*]] = sub nsw i32 [[DIV61]], 1
 // CHECK14-NEXT:    store i32 [[SUB62]], i32* [[DOTCAPTURE_EXPR_59]], align 4
-// CHECK14-NEXT:    [[TMP165:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_59]], align 4
-// CHECK14-NEXT:    [[ADD63:%.*]] = add nsw i32 [[TMP165]], 1
-// CHECK14-NEXT:    [[TMP166:%.*]] = zext i32 [[ADD63]] to i64
-// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP166]])
-// CHECK14-NEXT:    [[TMP167:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168.region_id, i32 4, i8** [[TMP160]], i8** [[TMP161]], i64* [[TMP162]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK14-NEXT:    [[TMP168:%.*]] = icmp ne i32 [[TMP167]], 0
-// CHECK14-NEXT:    br i1 [[TMP168]], label [[OMP_OFFLOAD_FAILED64:%.*]], label [[OMP_OFFLOAD_CONT65:%.*]]
+// CHECK14-NEXT:    [[TMP158:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_59]], align 4
+// CHECK14-NEXT:    [[ADD63:%.*]] = add nsw i32 [[TMP158]], 1
+// CHECK14-NEXT:    [[TMP159:%.*]] = zext i32 [[ADD63]] to i64
+// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP159]])
+// CHECK14-NEXT:    [[TMP160:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168.region_id, i32 4, i8** [[TMP153]], i8** [[TMP154]], i64* [[TMP155]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP161:%.*]] = icmp ne i32 [[TMP160]], 0
+// CHECK14-NEXT:    br i1 [[TMP161]], label [[OMP_OFFLOAD_FAILED64:%.*]], label [[OMP_OFFLOAD_CONT65:%.*]]
 // CHECK14:       omp_offload.failed64:
-// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168(i64 [[TMP132]], i64 [[TMP134]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168(i64 [[TMP127]], i64 [[TMP129]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK14-NEXT:    br label [[OMP_OFFLOAD_CONT65]]
 // CHECK14:       omp_offload.cont65:
-// CHECK14-NEXT:    [[TMP169:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK14-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP169]])
+// CHECK14-NEXT:    [[TMP162:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK14-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP162]])
 // CHECK14-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK14-NEXT:    [[TMP170:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK14-NEXT:    call void @llvm.stackrestore(i8* [[TMP170]])
-// CHECK14-NEXT:    [[TMP171:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK14-NEXT:    ret i32 [[TMP171]]
+// CHECK14-NEXT:    [[TMP163:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK14-NEXT:    call void @llvm.stackrestore(i8* [[TMP163]])
+// CHECK14-NEXT:    [[TMP164:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK14-NEXT:    ret i32 [[TMP164]]
 //
 //
 // CHECK14-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148
@@ -10708,11 +10680,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP3]], i32* [[CONV2]], align 4
 // CHECK14-NEXT:    [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10794,7 +10766,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP21]], i32* [[CONV7]], align 4
 // CHECK14-NEXT:    [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]])
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -10836,7 +10808,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10956,11 +10928,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32*
 // CHECK14-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
 // CHECK14-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11032,7 +11004,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
 // CHECK14-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK14-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]])
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -11051,7 +11023,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11175,11 +11147,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP3]], i32* [[CONV2]], align 4
 // CHECK14-NEXT:    [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11259,7 +11231,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP20]], i32* [[CONV7]], align 4
 // CHECK14-NEXT:    [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]])
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -11278,7 +11250,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11384,7 +11356,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK14-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK14-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK14-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK14-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -11424,7 +11396,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK14-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK14-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK14-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK14:       omp_offload.failed:
@@ -11442,7 +11414,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK14-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l121.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l121.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK14-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK14-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK14:       omp_offload.failed5:
@@ -11472,7 +11444,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK14-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l126.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l126.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK14-NEXT:    [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
 // CHECK14-NEXT:    br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK14:       omp_offload.failed11:
@@ -11490,7 +11462,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK14-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l131.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l131.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK14-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
 // CHECK14-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK14:       omp_offload.failed17:
@@ -11520,7 +11492,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
 // CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK14-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l136.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l136.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK14-NEXT:    [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0
 // CHECK14-NEXT:    br i1 [[TMP58]], label [[OMP_OFFLOAD_FAILED25:%.*]], label [[OMP_OFFLOAD_CONT26:%.*]]
 // CHECK14:       omp_offload.failed25:
@@ -11536,11 +11508,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK14-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK14-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11588,7 +11560,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK14-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK14-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -11603,7 +11575,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11686,11 +11658,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK14-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK14-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11738,7 +11710,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK14-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK14-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -11753,7 +11725,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11847,11 +11819,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK14-NEXT:    [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11907,7 +11879,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4
 // CHECK14-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -11922,7 +11894,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -12030,11 +12002,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK14-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK14-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -12082,7 +12054,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK14-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK14-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -12097,7 +12069,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -12189,11 +12161,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK14-NEXT:    [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -12249,7 +12221,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4
 // CHECK14-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -12264,7 +12236,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -12344,7 +12316,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK14-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK14-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK14-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK14-NEXT:    ret void
@@ -12417,14 +12389,14 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK15-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK15-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK15-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK15-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK15-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK15-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK15-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK15-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK15-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK15-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK15-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK15-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK15-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -12433,298 +12405,284 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK15-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK15-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK15-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK15-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK15-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK15-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK15-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK15-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK15-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK15-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK15-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK15-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK15-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK15-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK15-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK15-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK15-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK15-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK15-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK15-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK15-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK15-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK15-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK15-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK15-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK15-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK15-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK15-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK15-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK15-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK15-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK15-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK15-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK15-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK15-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK15-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK15-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK15-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK15-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK15:       omp_offload.failed:
 // CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK15-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK15:       omp_offload.cont:
-// CHECK15-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK15-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK15-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK15-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK15-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK15-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK15-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK15-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK15-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK15-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i32 24, i1 false)
 // CHECK15-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK15-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK15-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK15-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK15-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK15-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK15-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK15-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK15-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK15-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK15-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK15-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK15-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK15-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK15-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK15-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK15-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK15-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK15-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK15-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK15-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK15-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK15-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK15-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK15-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK15-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK15-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK15-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK15-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK15-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK15-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK15-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK15-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK15-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK15-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK15-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK15-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK15-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK15-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK15-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK15-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK15-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK15-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK15-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK15-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK15-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK15-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP61]])
-// CHECK15-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK15-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK15-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK15-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK15-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK15-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP59]])
+// CHECK15-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK15-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK15:       omp_offload.failed15:
-// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK15-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK15:       omp_offload.cont16:
-// CHECK15-NEXT:    [[TMP64:%.*]] = load i32, i32* [[M]], align 4
-// CHECK15-NEXT:    store i32 [[TMP64]], i32* [[M_CASTED]], align 4
-// CHECK15-NEXT:    [[TMP65:%.*]] = load i32, i32* [[M_CASTED]], align 4
-// CHECK15-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP66]], i32* [[N_CASTED17]], align 4
-// CHECK15-NEXT:    [[TMP67:%.*]] = load i32, i32* [[N_CASTED17]], align 4
-// CHECK15-NEXT:    [[TMP68:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK15-NEXT:    [[TMP69:%.*]] = sext i32 [[TMP68]] to i64
-// CHECK15-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP65]], i32* [[TMP71]], align 4
-// CHECK15-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP65]], i32* [[TMP73]], align 4
-// CHECK15-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK15-NEXT:    store i64 4, i64* [[TMP74]], align 4
-// CHECK15-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
-// CHECK15-NEXT:    store i8* null, i8** [[TMP75]], align 4
-// CHECK15-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP62:%.*]] = load i32, i32* [[M]], align 4
+// CHECK15-NEXT:    store i32 [[TMP62]], i32* [[M_CASTED]], align 4
+// CHECK15-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M_CASTED]], align 4
+// CHECK15-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP64]], i32* [[N_CASTED17]], align 4
+// CHECK15-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N_CASTED17]], align 4
+// CHECK15-NEXT:    [[TMP66:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK15-NEXT:    [[TMP67:%.*]] = sext i32 [[TMP66]] to i64
+// CHECK15-NEXT:    [[TMP68:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES21]] to i8*
+// CHECK15-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP68]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i32 32, i1 false)
+// CHECK15-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP63]], i32* [[TMP70]], align 4
+// CHECK15-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP63]], i32* [[TMP72]], align 4
+// CHECK15-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
+// CHECK15-NEXT:    store i8* null, i8** [[TMP73]], align 4
+// CHECK15-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP65]], i32* [[TMP75]], align 4
+// CHECK15-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
 // CHECK15-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP67]], i32* [[TMP77]], align 4
-// CHECK15-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
-// CHECK15-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP67]], i32* [[TMP79]], align 4
-// CHECK15-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 1
-// CHECK15-NEXT:    store i64 4, i64* [[TMP80]], align 4
-// CHECK15-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
-// CHECK15-NEXT:    store i8* null, i8** [[TMP81]], align 4
-// CHECK15-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP83]], align 4
-// CHECK15-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP85]], align 4
-// CHECK15-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 2
-// CHECK15-NEXT:    store i64 4, i64* [[TMP86]], align 4
-// CHECK15-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
-// CHECK15-NEXT:    store i8* null, i8** [[TMP87]], align 4
-// CHECK15-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
-// CHECK15-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP89]], align 4
-// CHECK15-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
-// CHECK15-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP91]], align 4
-// CHECK15-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 3
-// CHECK15-NEXT:    store i64 [[TMP69]], i64* [[TMP92]], align 4
-// CHECK15-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
-// CHECK15-NEXT:    store i8* null, i8** [[TMP93]], align 4
-// CHECK15-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP97:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP97]], i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK15-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK15-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP98]], 0
+// CHECK15-NEXT:    store i32 [[TMP65]], i32* [[TMP77]], align 4
+// CHECK15-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
+// CHECK15-NEXT:    store i8* null, i8** [[TMP78]], align 4
+// CHECK15-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
+// CHECK15-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP82]], align 4
+// CHECK15-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
+// CHECK15-NEXT:    store i8* null, i8** [[TMP83]], align 4
+// CHECK15-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
+// CHECK15-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP85]], align 4
+// CHECK15-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
+// CHECK15-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP87]], align 4
+// CHECK15-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 3
+// CHECK15-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 4
+// CHECK15-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
+// CHECK15-NEXT:    store i8* null, i8** [[TMP89]], align 4
+// CHECK15-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP93:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP93]], i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK15-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK15-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP94]], 0
 // CHECK15-NEXT:    [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1
 // CHECK15-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[DIV26]], 1
 // CHECK15-NEXT:    store i32 [[SUB27]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK15-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK15-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP99]], 1
-// CHECK15-NEXT:    [[TMP100:%.*]] = zext i32 [[ADD28]] to i64
-// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP100]])
-// CHECK15-NEXT:    [[TMP101:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158.region_id, i32 4, i8** [[TMP94]], i8** [[TMP95]], i64* [[TMP96]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK15-NEXT:    [[TMP102:%.*]] = icmp ne i32 [[TMP101]], 0
-// CHECK15-NEXT:    br i1 [[TMP102]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
+// CHECK15-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK15-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP95]], 1
+// CHECK15-NEXT:    [[TMP96:%.*]] = zext i32 [[ADD28]] to i64
+// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP96]])
+// CHECK15-NEXT:    [[TMP97:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158.region_id, i32 4, i8** [[TMP90]], i8** [[TMP91]], i64* [[TMP92]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP98:%.*]] = icmp ne i32 [[TMP97]], 0
+// CHECK15-NEXT:    br i1 [[TMP98]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
 // CHECK15:       omp_offload.failed29:
-// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158(i32 [[TMP65]], i32 [[TMP67]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158(i32 [[TMP63]], i32 [[TMP65]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK15-NEXT:    br label [[OMP_OFFLOAD_CONT30]]
 // CHECK15:       omp_offload.cont30:
-// CHECK15-NEXT:    [[TMP103:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP103]], i32* [[N_CASTED31]], align 4
-// CHECK15-NEXT:    [[TMP104:%.*]] = load i32, i32* [[N_CASTED31]], align 4
-// CHECK15-NEXT:    [[TMP105:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK15-NEXT:    [[TMP106:%.*]] = sext i32 [[TMP105]] to i64
-// CHECK15-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP104]], i32* [[TMP108]], align 4
-// CHECK15-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP99:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP99]], i32* [[N_CASTED31]], align 4
+// CHECK15-NEXT:    [[TMP100:%.*]] = load i32, i32* [[N_CASTED31]], align 4
+// CHECK15-NEXT:    [[TMP101:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK15-NEXT:    [[TMP102:%.*]] = sext i32 [[TMP101]] to i64
+// CHECK15-NEXT:    [[TMP103:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES35]] to i8*
+// CHECK15-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP103]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i32 24, i1 false)
+// CHECK15-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP100]], i32* [[TMP105]], align 4
+// CHECK15-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP100]], i32* [[TMP107]], align 4
+// CHECK15-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 0
+// CHECK15-NEXT:    store i8* null, i8** [[TMP108]], align 4
+// CHECK15-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 1
 // CHECK15-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP104]], i32* [[TMP110]], align 4
-// CHECK15-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 0
-// CHECK15-NEXT:    store i64 4, i64* [[TMP111]], align 4
-// CHECK15-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 0
-// CHECK15-NEXT:    store i8* null, i8** [[TMP112]], align 4
-// CHECK15-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 1
-// CHECK15-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP114]], align 4
-// CHECK15-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 1
-// CHECK15-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP116]], align 4
-// CHECK15-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 1
-// CHECK15-NEXT:    store i64 4, i64* [[TMP117]], align 4
-// CHECK15-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 1
-// CHECK15-NEXT:    store i8* null, i8** [[TMP118]], align 4
-// CHECK15-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP120]], align 4
-// CHECK15-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP122]], align 4
-// CHECK15-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 2
-// CHECK15-NEXT:    store i64 [[TMP106]], i64* [[TMP123]], align 4
-// CHECK15-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 2
-// CHECK15-NEXT:    store i8* null, i8** [[TMP124]], align 4
-// CHECK15-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP128:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP128]], i32* [[DOTCAPTURE_EXPR_37]], align 4
-// CHECK15-NEXT:    [[TMP129:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4
-// CHECK15-NEXT:    [[SUB39:%.*]] = sub nsw i32 [[TMP129]], 0
+// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP110]], align 4
+// CHECK15-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP112]], align 4
+// CHECK15-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 1
+// CHECK15-NEXT:    store i8* null, i8** [[TMP113]], align 4
+// CHECK15-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP115]], align 4
+// CHECK15-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP117]], align 4
+// CHECK15-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 2
+// CHECK15-NEXT:    store i64 [[TMP102]], i64* [[TMP118]], align 4
+// CHECK15-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 2
+// CHECK15-NEXT:    store i8* null, i8** [[TMP119]], align 4
+// CHECK15-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP123:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP123]], i32* [[DOTCAPTURE_EXPR_37]], align 4
+// CHECK15-NEXT:    [[TMP124:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4
+// CHECK15-NEXT:    [[SUB39:%.*]] = sub nsw i32 [[TMP124]], 0
 // CHECK15-NEXT:    [[DIV40:%.*]] = sdiv i32 [[SUB39]], 1
 // CHECK15-NEXT:    [[SUB41:%.*]] = sub nsw i32 [[DIV40]], 1
 // CHECK15-NEXT:    store i32 [[SUB41]], i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK15-NEXT:    [[TMP130:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK15-NEXT:    [[ADD42:%.*]] = add nsw i32 [[TMP130]], 1
-// CHECK15-NEXT:    [[TMP131:%.*]] = zext i32 [[ADD42]] to i64
-// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP131]])
-// CHECK15-NEXT:    [[TMP132:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163.region_id, i32 3, i8** [[TMP125]], i8** [[TMP126]], i64* [[TMP127]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK15-NEXT:    [[TMP133:%.*]] = icmp ne i32 [[TMP132]], 0
-// CHECK15-NEXT:    br i1 [[TMP133]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
+// CHECK15-NEXT:    [[TMP125:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
+// CHECK15-NEXT:    [[ADD42:%.*]] = add nsw i32 [[TMP125]], 1
+// CHECK15-NEXT:    [[TMP126:%.*]] = zext i32 [[ADD42]] to i64
+// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP126]])
+// CHECK15-NEXT:    [[TMP127:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163.region_id, i32 3, i8** [[TMP120]], i8** [[TMP121]], i64* [[TMP122]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP128:%.*]] = icmp ne i32 [[TMP127]], 0
+// CHECK15-NEXT:    br i1 [[TMP128]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
 // CHECK15:       omp_offload.failed43:
-// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163(i32 [[TMP104]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163(i32 [[TMP100]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK15-NEXT:    br label [[OMP_OFFLOAD_CONT44]]
 // CHECK15:       omp_offload.cont44:
-// CHECK15-NEXT:    [[TMP134:%.*]] = load i32, i32* [[M]], align 4
-// CHECK15-NEXT:    store i32 [[TMP134]], i32* [[M_CASTED45]], align 4
-// CHECK15-NEXT:    [[TMP135:%.*]] = load i32, i32* [[M_CASTED45]], align 4
-// CHECK15-NEXT:    [[TMP136:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP136]], i32* [[N_CASTED46]], align 4
-// CHECK15-NEXT:    [[TMP137:%.*]] = load i32, i32* [[N_CASTED46]], align 4
-// CHECK15-NEXT:    [[TMP138:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK15-NEXT:    [[TMP139:%.*]] = sext i32 [[TMP138]] to i64
-// CHECK15-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP135]], i32* [[TMP141]], align 4
-// CHECK15-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP135]], i32* [[TMP143]], align 4
-// CHECK15-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 0
-// CHECK15-NEXT:    store i64 4, i64* [[TMP144]], align 4
-// CHECK15-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP129:%.*]] = load i32, i32* [[M]], align 4
+// CHECK15-NEXT:    store i32 [[TMP129]], i32* [[M_CASTED45]], align 4
+// CHECK15-NEXT:    [[TMP130:%.*]] = load i32, i32* [[M_CASTED45]], align 4
+// CHECK15-NEXT:    [[TMP131:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP131]], i32* [[N_CASTED46]], align 4
+// CHECK15-NEXT:    [[TMP132:%.*]] = load i32, i32* [[N_CASTED46]], align 4
+// CHECK15-NEXT:    [[TMP133:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK15-NEXT:    [[TMP134:%.*]] = sext i32 [[TMP133]] to i64
+// CHECK15-NEXT:    [[TMP135:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES50]] to i8*
+// CHECK15-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP135]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i32 32, i1 false)
+// CHECK15-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP130]], i32* [[TMP137]], align 4
+// CHECK15-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP130]], i32* [[TMP139]], align 4
+// CHECK15-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 0
+// CHECK15-NEXT:    store i8* null, i8** [[TMP140]], align 4
+// CHECK15-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP132]], i32* [[TMP142]], align 4
+// CHECK15-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to i32*
+// CHECK15-NEXT:    store i32 [[TMP132]], i32* [[TMP144]], align 4
+// CHECK15-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 1
 // CHECK15-NEXT:    store i8* null, i8** [[TMP145]], align 4
-// CHECK15-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 2
 // CHECK15-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP137]], i32* [[TMP147]], align 4
-// CHECK15-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 1
+// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP147]], align 4
+// CHECK15-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 2
 // CHECK15-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP137]], i32* [[TMP149]], align 4
-// CHECK15-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 1
-// CHECK15-NEXT:    store i64 4, i64* [[TMP150]], align 4
-// CHECK15-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 1
-// CHECK15-NEXT:    store i8* null, i8** [[TMP151]], align 4
-// CHECK15-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP153]], align 4
-// CHECK15-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP155]], align 4
-// CHECK15-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 2
-// CHECK15-NEXT:    store i64 4, i64* [[TMP156]], align 4
-// CHECK15-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 2
-// CHECK15-NEXT:    store i8* null, i8** [[TMP157]], align 4
-// CHECK15-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 3
-// CHECK15-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP159]], align 4
-// CHECK15-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 3
-// CHECK15-NEXT:    [[TMP161:%.*]] = bitcast i8** [[TMP160]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP161]], align 4
-// CHECK15-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 3
-// CHECK15-NEXT:    store i64 [[TMP139]], i64* [[TMP162]], align 4
-// CHECK15-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 3
-// CHECK15-NEXT:    store i8* null, i8** [[TMP163]], align 4
-// CHECK15-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP167:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP167]], i32* [[DOTCAPTURE_EXPR_52]], align 4
-// CHECK15-NEXT:    [[TMP168:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_52]], align 4
-// CHECK15-NEXT:    [[SUB54:%.*]] = sub nsw i32 [[TMP168]], 0
+// CHECK15-NEXT:    store i32 [[TMP0]], i32* [[TMP149]], align 4
+// CHECK15-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 2
+// CHECK15-NEXT:    store i8* null, i8** [[TMP150]], align 4
+// CHECK15-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 3
+// CHECK15-NEXT:    [[TMP152:%.*]] = bitcast i8** [[TMP151]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP152]], align 4
+// CHECK15-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 3
+// CHECK15-NEXT:    [[TMP154:%.*]] = bitcast i8** [[TMP153]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP154]], align 4
+// CHECK15-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 3
+// CHECK15-NEXT:    store i64 [[TMP134]], i64* [[TMP155]], align 4
+// CHECK15-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 3
+// CHECK15-NEXT:    store i8* null, i8** [[TMP156]], align 4
+// CHECK15-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP160:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP160]], i32* [[DOTCAPTURE_EXPR_52]], align 4
+// CHECK15-NEXT:    [[TMP161:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_52]], align 4
+// CHECK15-NEXT:    [[SUB54:%.*]] = sub nsw i32 [[TMP161]], 0
 // CHECK15-NEXT:    [[DIV55:%.*]] = sdiv i32 [[SUB54]], 1
 // CHECK15-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[DIV55]], 1
 // CHECK15-NEXT:    store i32 [[SUB56]], i32* [[DOTCAPTURE_EXPR_53]], align 4
-// CHECK15-NEXT:    [[TMP169:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_53]], align 4
-// CHECK15-NEXT:    [[ADD57:%.*]] = add nsw i32 [[TMP169]], 1
-// CHECK15-NEXT:    [[TMP170:%.*]] = zext i32 [[ADD57]] to i64
-// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP170]])
-// CHECK15-NEXT:    [[TMP171:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168.region_id, i32 4, i8** [[TMP164]], i8** [[TMP165]], i64* [[TMP166]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK15-NEXT:    [[TMP172:%.*]] = icmp ne i32 [[TMP171]], 0
-// CHECK15-NEXT:    br i1 [[TMP172]], label [[OMP_OFFLOAD_FAILED58:%.*]], label [[OMP_OFFLOAD_CONT59:%.*]]
+// CHECK15-NEXT:    [[TMP162:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_53]], align 4
+// CHECK15-NEXT:    [[ADD57:%.*]] = add nsw i32 [[TMP162]], 1
+// CHECK15-NEXT:    [[TMP163:%.*]] = zext i32 [[ADD57]] to i64
+// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP163]])
+// CHECK15-NEXT:    [[TMP164:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168.region_id, i32 4, i8** [[TMP157]], i8** [[TMP158]], i64* [[TMP159]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP165:%.*]] = icmp ne i32 [[TMP164]], 0
+// CHECK15-NEXT:    br i1 [[TMP165]], label [[OMP_OFFLOAD_FAILED58:%.*]], label [[OMP_OFFLOAD_CONT59:%.*]]
 // CHECK15:       omp_offload.failed58:
-// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168(i32 [[TMP135]], i32 [[TMP137]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168(i32 [[TMP130]], i32 [[TMP132]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK15-NEXT:    br label [[OMP_OFFLOAD_CONT59]]
 // CHECK15:       omp_offload.cont59:
-// CHECK15-NEXT:    [[TMP173:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK15-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP173]])
+// CHECK15-NEXT:    [[TMP166:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK15-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP166]])
 // CHECK15-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK15-NEXT:    [[TMP174:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK15-NEXT:    call void @llvm.stackrestore(i8* [[TMP174]])
-// CHECK15-NEXT:    [[TMP175:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK15-NEXT:    ret i32 [[TMP175]]
+// CHECK15-NEXT:    [[TMP167:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK15-NEXT:    call void @llvm.stackrestore(i8* [[TMP167]])
+// CHECK15-NEXT:    [[TMP168:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK15-NEXT:    ret i32 [[TMP168]]
 //
 //
 // CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148
@@ -13159,11 +13117,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK15-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK15-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -13241,7 +13199,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK15-NEXT:    store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK15-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]])
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -13283,7 +13241,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -13398,11 +13356,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 4
 // CHECK15-NEXT:    [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
 // CHECK15-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -13472,7 +13430,7 @@ int main (int argc, char **argv) {
 // CHECK15:       omp.inner.for.body:
 // CHECK15-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK15-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]])
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -13491,7 +13449,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -13609,11 +13567,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK15-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK15-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -13689,7 +13647,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK15-NEXT:    store i32 [[TMP18]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK15-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]])
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -13708,7 +13666,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -13810,7 +13768,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK15-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK15-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK15-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -13850,7 +13808,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK15-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK15-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK15-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK15:       omp_offload.failed:
@@ -13868,7 +13826,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK15-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l121.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l121.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK15-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK15-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK15:       omp_offload.failed5:
@@ -13897,7 +13855,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK15-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l126.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l126.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK15-NEXT:    [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
 // CHECK15-NEXT:    br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK15:       omp_offload.failed11:
@@ -13915,7 +13873,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK15-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l131.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l131.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK15-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
 // CHECK15-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK15:       omp_offload.failed17:
@@ -13944,7 +13902,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
 // CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK15-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l136.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l136.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK15-NEXT:    [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0
 // CHECK15-NEXT:    br i1 [[TMP58]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]]
 // CHECK15:       omp_offload.failed24:
@@ -13960,11 +13918,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK15-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK15-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -14010,7 +13968,7 @@ int main (int argc, char **argv) {
 // CHECK15:       omp.inner.for.body:
 // CHECK15-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK15-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -14025,7 +13983,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -14105,11 +14063,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK15-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK15-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -14155,7 +14113,7 @@ int main (int argc, char **argv) {
 // CHECK15:       omp.inner.for.body:
 // CHECK15-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK15-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -14170,7 +14128,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -14259,11 +14217,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK15-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK15-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -14315,7 +14273,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK15-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK15-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -14330,7 +14288,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -14432,11 +14390,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK15-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK15-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -14482,7 +14440,7 @@ int main (int argc, char **argv) {
 // CHECK15:       omp.inner.for.body:
 // CHECK15-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK15-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -14497,7 +14455,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -14584,11 +14542,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK15-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK15-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -14640,7 +14598,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK15-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK15-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -14655,7 +14613,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -14731,7 +14689,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK15-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK15-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK15-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK15-NEXT:    ret void
@@ -14804,14 +14762,14 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK16-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK16-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK16-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK16-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK16-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK16-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK16-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK16-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK16-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK16-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK16-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK16-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK16-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -14820,298 +14778,284 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK16-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK16-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK16-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK16-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK16-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK16-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK16-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK16-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK16-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK16-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK16-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK16-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK16-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK16-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK16-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK16-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK16-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK16-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK16-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK16-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK16-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK16-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK16-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK16-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK16-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK16-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK16-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK16-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK16-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK16-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK16-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK16-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK16-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK16-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK16-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK16-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK16-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK16-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK16-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK16:       omp_offload.failed:
 // CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK16-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK16:       omp_offload.cont:
-// CHECK16-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK16-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK16-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK16-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK16-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK16-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK16-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK16-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK16-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK16-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i32 24, i1 false)
 // CHECK16-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK16-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK16-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK16-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK16-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK16-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK16-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK16-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK16-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK16-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK16-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK16-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK16-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK16-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK16-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK16-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK16-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK16-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK16-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK16-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK16-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK16-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK16-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK16-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK16-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK16-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK16-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK16-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK16-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK16-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK16-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK16-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK16-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK16-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK16-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK16-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK16-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK16-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK16-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK16-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK16-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK16-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK16-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK16-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK16-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK16-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK16-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP61]])
-// CHECK16-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK16-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK16-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK16-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK16-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK16-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP59]])
+// CHECK16-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK16-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK16:       omp_offload.failed15:
-// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK16-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK16:       omp_offload.cont16:
-// CHECK16-NEXT:    [[TMP64:%.*]] = load i32, i32* [[M]], align 4
-// CHECK16-NEXT:    store i32 [[TMP64]], i32* [[M_CASTED]], align 4
-// CHECK16-NEXT:    [[TMP65:%.*]] = load i32, i32* [[M_CASTED]], align 4
-// CHECK16-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP66]], i32* [[N_CASTED17]], align 4
-// CHECK16-NEXT:    [[TMP67:%.*]] = load i32, i32* [[N_CASTED17]], align 4
-// CHECK16-NEXT:    [[TMP68:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK16-NEXT:    [[TMP69:%.*]] = sext i32 [[TMP68]] to i64
-// CHECK16-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP65]], i32* [[TMP71]], align 4
-// CHECK16-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP65]], i32* [[TMP73]], align 4
-// CHECK16-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK16-NEXT:    store i64 4, i64* [[TMP74]], align 4
-// CHECK16-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
-// CHECK16-NEXT:    store i8* null, i8** [[TMP75]], align 4
-// CHECK16-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP62:%.*]] = load i32, i32* [[M]], align 4
+// CHECK16-NEXT:    store i32 [[TMP62]], i32* [[M_CASTED]], align 4
+// CHECK16-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M_CASTED]], align 4
+// CHECK16-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP64]], i32* [[N_CASTED17]], align 4
+// CHECK16-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N_CASTED17]], align 4
+// CHECK16-NEXT:    [[TMP66:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK16-NEXT:    [[TMP67:%.*]] = sext i32 [[TMP66]] to i64
+// CHECK16-NEXT:    [[TMP68:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES21]] to i8*
+// CHECK16-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP68]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i32 32, i1 false)
+// CHECK16-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP63]], i32* [[TMP70]], align 4
+// CHECK16-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP63]], i32* [[TMP72]], align 4
+// CHECK16-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
+// CHECK16-NEXT:    store i8* null, i8** [[TMP73]], align 4
+// CHECK16-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP65]], i32* [[TMP75]], align 4
+// CHECK16-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
 // CHECK16-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP67]], i32* [[TMP77]], align 4
-// CHECK16-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
-// CHECK16-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP67]], i32* [[TMP79]], align 4
-// CHECK16-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 1
-// CHECK16-NEXT:    store i64 4, i64* [[TMP80]], align 4
-// CHECK16-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
-// CHECK16-NEXT:    store i8* null, i8** [[TMP81]], align 4
-// CHECK16-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP83]], align 4
-// CHECK16-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP85]], align 4
-// CHECK16-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 2
-// CHECK16-NEXT:    store i64 4, i64* [[TMP86]], align 4
-// CHECK16-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
-// CHECK16-NEXT:    store i8* null, i8** [[TMP87]], align 4
-// CHECK16-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
-// CHECK16-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP89]], align 4
-// CHECK16-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
-// CHECK16-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP91]], align 4
-// CHECK16-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 3
-// CHECK16-NEXT:    store i64 [[TMP69]], i64* [[TMP92]], align 4
-// CHECK16-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
-// CHECK16-NEXT:    store i8* null, i8** [[TMP93]], align 4
-// CHECK16-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP97:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP97]], i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK16-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK16-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP98]], 0
+// CHECK16-NEXT:    store i32 [[TMP65]], i32* [[TMP77]], align 4
+// CHECK16-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
+// CHECK16-NEXT:    store i8* null, i8** [[TMP78]], align 4
+// CHECK16-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
+// CHECK16-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP82]], align 4
+// CHECK16-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
+// CHECK16-NEXT:    store i8* null, i8** [[TMP83]], align 4
+// CHECK16-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
+// CHECK16-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP85]], align 4
+// CHECK16-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
+// CHECK16-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP87]], align 4
+// CHECK16-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 3
+// CHECK16-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 4
+// CHECK16-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
+// CHECK16-NEXT:    store i8* null, i8** [[TMP89]], align 4
+// CHECK16-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP93:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP93]], i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK16-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK16-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP94]], 0
 // CHECK16-NEXT:    [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1
 // CHECK16-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[DIV26]], 1
 // CHECK16-NEXT:    store i32 [[SUB27]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK16-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK16-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP99]], 1
-// CHECK16-NEXT:    [[TMP100:%.*]] = zext i32 [[ADD28]] to i64
-// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP100]])
-// CHECK16-NEXT:    [[TMP101:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158.region_id, i32 4, i8** [[TMP94]], i8** [[TMP95]], i64* [[TMP96]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK16-NEXT:    [[TMP102:%.*]] = icmp ne i32 [[TMP101]], 0
-// CHECK16-NEXT:    br i1 [[TMP102]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
+// CHECK16-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK16-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP95]], 1
+// CHECK16-NEXT:    [[TMP96:%.*]] = zext i32 [[ADD28]] to i64
+// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP96]])
+// CHECK16-NEXT:    [[TMP97:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158.region_id, i32 4, i8** [[TMP90]], i8** [[TMP91]], i64* [[TMP92]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP98:%.*]] = icmp ne i32 [[TMP97]], 0
+// CHECK16-NEXT:    br i1 [[TMP98]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
 // CHECK16:       omp_offload.failed29:
-// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158(i32 [[TMP65]], i32 [[TMP67]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158(i32 [[TMP63]], i32 [[TMP65]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK16-NEXT:    br label [[OMP_OFFLOAD_CONT30]]
 // CHECK16:       omp_offload.cont30:
-// CHECK16-NEXT:    [[TMP103:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP103]], i32* [[N_CASTED31]], align 4
-// CHECK16-NEXT:    [[TMP104:%.*]] = load i32, i32* [[N_CASTED31]], align 4
-// CHECK16-NEXT:    [[TMP105:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK16-NEXT:    [[TMP106:%.*]] = sext i32 [[TMP105]] to i64
-// CHECK16-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP104]], i32* [[TMP108]], align 4
-// CHECK16-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP99:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP99]], i32* [[N_CASTED31]], align 4
+// CHECK16-NEXT:    [[TMP100:%.*]] = load i32, i32* [[N_CASTED31]], align 4
+// CHECK16-NEXT:    [[TMP101:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK16-NEXT:    [[TMP102:%.*]] = sext i32 [[TMP101]] to i64
+// CHECK16-NEXT:    [[TMP103:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES35]] to i8*
+// CHECK16-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP103]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i32 24, i1 false)
+// CHECK16-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP100]], i32* [[TMP105]], align 4
+// CHECK16-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP100]], i32* [[TMP107]], align 4
+// CHECK16-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 0
+// CHECK16-NEXT:    store i8* null, i8** [[TMP108]], align 4
+// CHECK16-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 1
 // CHECK16-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP104]], i32* [[TMP110]], align 4
-// CHECK16-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 0
-// CHECK16-NEXT:    store i64 4, i64* [[TMP111]], align 4
-// CHECK16-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 0
-// CHECK16-NEXT:    store i8* null, i8** [[TMP112]], align 4
-// CHECK16-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 1
-// CHECK16-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP114]], align 4
-// CHECK16-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 1
-// CHECK16-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP116]], align 4
-// CHECK16-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 1
-// CHECK16-NEXT:    store i64 4, i64* [[TMP117]], align 4
-// CHECK16-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 1
-// CHECK16-NEXT:    store i8* null, i8** [[TMP118]], align 4
-// CHECK16-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP120]], align 4
-// CHECK16-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP122]], align 4
-// CHECK16-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 2
-// CHECK16-NEXT:    store i64 [[TMP106]], i64* [[TMP123]], align 4
-// CHECK16-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 2
-// CHECK16-NEXT:    store i8* null, i8** [[TMP124]], align 4
-// CHECK16-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP128:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP128]], i32* [[DOTCAPTURE_EXPR_37]], align 4
-// CHECK16-NEXT:    [[TMP129:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4
-// CHECK16-NEXT:    [[SUB39:%.*]] = sub nsw i32 [[TMP129]], 0
+// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP110]], align 4
+// CHECK16-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP112]], align 4
+// CHECK16-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 1
+// CHECK16-NEXT:    store i8* null, i8** [[TMP113]], align 4
+// CHECK16-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP115]], align 4
+// CHECK16-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP117]], align 4
+// CHECK16-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 2
+// CHECK16-NEXT:    store i64 [[TMP102]], i64* [[TMP118]], align 4
+// CHECK16-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 2
+// CHECK16-NEXT:    store i8* null, i8** [[TMP119]], align 4
+// CHECK16-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP123:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP123]], i32* [[DOTCAPTURE_EXPR_37]], align 4
+// CHECK16-NEXT:    [[TMP124:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4
+// CHECK16-NEXT:    [[SUB39:%.*]] = sub nsw i32 [[TMP124]], 0
 // CHECK16-NEXT:    [[DIV40:%.*]] = sdiv i32 [[SUB39]], 1
 // CHECK16-NEXT:    [[SUB41:%.*]] = sub nsw i32 [[DIV40]], 1
 // CHECK16-NEXT:    store i32 [[SUB41]], i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK16-NEXT:    [[TMP130:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK16-NEXT:    [[ADD42:%.*]] = add nsw i32 [[TMP130]], 1
-// CHECK16-NEXT:    [[TMP131:%.*]] = zext i32 [[ADD42]] to i64
-// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP131]])
-// CHECK16-NEXT:    [[TMP132:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163.region_id, i32 3, i8** [[TMP125]], i8** [[TMP126]], i64* [[TMP127]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK16-NEXT:    [[TMP133:%.*]] = icmp ne i32 [[TMP132]], 0
-// CHECK16-NEXT:    br i1 [[TMP133]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
+// CHECK16-NEXT:    [[TMP125:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
+// CHECK16-NEXT:    [[ADD42:%.*]] = add nsw i32 [[TMP125]], 1
+// CHECK16-NEXT:    [[TMP126:%.*]] = zext i32 [[ADD42]] to i64
+// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP126]])
+// CHECK16-NEXT:    [[TMP127:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163.region_id, i32 3, i8** [[TMP120]], i8** [[TMP121]], i64* [[TMP122]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP128:%.*]] = icmp ne i32 [[TMP127]], 0
+// CHECK16-NEXT:    br i1 [[TMP128]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
 // CHECK16:       omp_offload.failed43:
-// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163(i32 [[TMP104]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163(i32 [[TMP100]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK16-NEXT:    br label [[OMP_OFFLOAD_CONT44]]
 // CHECK16:       omp_offload.cont44:
-// CHECK16-NEXT:    [[TMP134:%.*]] = load i32, i32* [[M]], align 4
-// CHECK16-NEXT:    store i32 [[TMP134]], i32* [[M_CASTED45]], align 4
-// CHECK16-NEXT:    [[TMP135:%.*]] = load i32, i32* [[M_CASTED45]], align 4
-// CHECK16-NEXT:    [[TMP136:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP136]], i32* [[N_CASTED46]], align 4
-// CHECK16-NEXT:    [[TMP137:%.*]] = load i32, i32* [[N_CASTED46]], align 4
-// CHECK16-NEXT:    [[TMP138:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK16-NEXT:    [[TMP139:%.*]] = sext i32 [[TMP138]] to i64
-// CHECK16-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP135]], i32* [[TMP141]], align 4
-// CHECK16-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP135]], i32* [[TMP143]], align 4
-// CHECK16-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 0
-// CHECK16-NEXT:    store i64 4, i64* [[TMP144]], align 4
-// CHECK16-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP129:%.*]] = load i32, i32* [[M]], align 4
+// CHECK16-NEXT:    store i32 [[TMP129]], i32* [[M_CASTED45]], align 4
+// CHECK16-NEXT:    [[TMP130:%.*]] = load i32, i32* [[M_CASTED45]], align 4
+// CHECK16-NEXT:    [[TMP131:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP131]], i32* [[N_CASTED46]], align 4
+// CHECK16-NEXT:    [[TMP132:%.*]] = load i32, i32* [[N_CASTED46]], align 4
+// CHECK16-NEXT:    [[TMP133:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK16-NEXT:    [[TMP134:%.*]] = sext i32 [[TMP133]] to i64
+// CHECK16-NEXT:    [[TMP135:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES50]] to i8*
+// CHECK16-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP135]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i32 32, i1 false)
+// CHECK16-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP130]], i32* [[TMP137]], align 4
+// CHECK16-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP130]], i32* [[TMP139]], align 4
+// CHECK16-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 0
+// CHECK16-NEXT:    store i8* null, i8** [[TMP140]], align 4
+// CHECK16-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP132]], i32* [[TMP142]], align 4
+// CHECK16-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to i32*
+// CHECK16-NEXT:    store i32 [[TMP132]], i32* [[TMP144]], align 4
+// CHECK16-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 1
 // CHECK16-NEXT:    store i8* null, i8** [[TMP145]], align 4
-// CHECK16-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 2
 // CHECK16-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP137]], i32* [[TMP147]], align 4
-// CHECK16-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 1
+// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP147]], align 4
+// CHECK16-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 2
 // CHECK16-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP137]], i32* [[TMP149]], align 4
-// CHECK16-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 1
-// CHECK16-NEXT:    store i64 4, i64* [[TMP150]], align 4
-// CHECK16-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 1
-// CHECK16-NEXT:    store i8* null, i8** [[TMP151]], align 4
-// CHECK16-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP153]], align 4
-// CHECK16-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP155]], align 4
-// CHECK16-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 2
-// CHECK16-NEXT:    store i64 4, i64* [[TMP156]], align 4
-// CHECK16-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 2
-// CHECK16-NEXT:    store i8* null, i8** [[TMP157]], align 4
-// CHECK16-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 3
-// CHECK16-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP159]], align 4
-// CHECK16-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 3
-// CHECK16-NEXT:    [[TMP161:%.*]] = bitcast i8** [[TMP160]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP161]], align 4
-// CHECK16-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 3
-// CHECK16-NEXT:    store i64 [[TMP139]], i64* [[TMP162]], align 4
-// CHECK16-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 3
-// CHECK16-NEXT:    store i8* null, i8** [[TMP163]], align 4
-// CHECK16-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP167:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP167]], i32* [[DOTCAPTURE_EXPR_52]], align 4
-// CHECK16-NEXT:    [[TMP168:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_52]], align 4
-// CHECK16-NEXT:    [[SUB54:%.*]] = sub nsw i32 [[TMP168]], 0
+// CHECK16-NEXT:    store i32 [[TMP0]], i32* [[TMP149]], align 4
+// CHECK16-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 2
+// CHECK16-NEXT:    store i8* null, i8** [[TMP150]], align 4
+// CHECK16-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 3
+// CHECK16-NEXT:    [[TMP152:%.*]] = bitcast i8** [[TMP151]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP152]], align 4
+// CHECK16-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 3
+// CHECK16-NEXT:    [[TMP154:%.*]] = bitcast i8** [[TMP153]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP154]], align 4
+// CHECK16-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 3
+// CHECK16-NEXT:    store i64 [[TMP134]], i64* [[TMP155]], align 4
+// CHECK16-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 3
+// CHECK16-NEXT:    store i8* null, i8** [[TMP156]], align 4
+// CHECK16-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP160:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP160]], i32* [[DOTCAPTURE_EXPR_52]], align 4
+// CHECK16-NEXT:    [[TMP161:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_52]], align 4
+// CHECK16-NEXT:    [[SUB54:%.*]] = sub nsw i32 [[TMP161]], 0
 // CHECK16-NEXT:    [[DIV55:%.*]] = sdiv i32 [[SUB54]], 1
 // CHECK16-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[DIV55]], 1
 // CHECK16-NEXT:    store i32 [[SUB56]], i32* [[DOTCAPTURE_EXPR_53]], align 4
-// CHECK16-NEXT:    [[TMP169:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_53]], align 4
-// CHECK16-NEXT:    [[ADD57:%.*]] = add nsw i32 [[TMP169]], 1
-// CHECK16-NEXT:    [[TMP170:%.*]] = zext i32 [[ADD57]] to i64
-// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP170]])
-// CHECK16-NEXT:    [[TMP171:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168.region_id, i32 4, i8** [[TMP164]], i8** [[TMP165]], i64* [[TMP166]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK16-NEXT:    [[TMP172:%.*]] = icmp ne i32 [[TMP171]], 0
-// CHECK16-NEXT:    br i1 [[TMP172]], label [[OMP_OFFLOAD_FAILED58:%.*]], label [[OMP_OFFLOAD_CONT59:%.*]]
+// CHECK16-NEXT:    [[TMP162:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_53]], align 4
+// CHECK16-NEXT:    [[ADD57:%.*]] = add nsw i32 [[TMP162]], 1
+// CHECK16-NEXT:    [[TMP163:%.*]] = zext i32 [[ADD57]] to i64
+// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP163]])
+// CHECK16-NEXT:    [[TMP164:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168.region_id, i32 4, i8** [[TMP157]], i8** [[TMP158]], i64* [[TMP159]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP165:%.*]] = icmp ne i32 [[TMP164]], 0
+// CHECK16-NEXT:    br i1 [[TMP165]], label [[OMP_OFFLOAD_FAILED58:%.*]], label [[OMP_OFFLOAD_CONT59:%.*]]
 // CHECK16:       omp_offload.failed58:
-// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168(i32 [[TMP135]], i32 [[TMP137]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168(i32 [[TMP130]], i32 [[TMP132]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK16-NEXT:    br label [[OMP_OFFLOAD_CONT59]]
 // CHECK16:       omp_offload.cont59:
-// CHECK16-NEXT:    [[TMP173:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK16-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP173]])
+// CHECK16-NEXT:    [[TMP166:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK16-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP166]])
 // CHECK16-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK16-NEXT:    [[TMP174:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK16-NEXT:    call void @llvm.stackrestore(i8* [[TMP174]])
-// CHECK16-NEXT:    [[TMP175:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK16-NEXT:    ret i32 [[TMP175]]
+// CHECK16-NEXT:    [[TMP167:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK16-NEXT:    call void @llvm.stackrestore(i8* [[TMP167]])
+// CHECK16-NEXT:    [[TMP168:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK16-NEXT:    ret i32 [[TMP168]]
 //
 //
 // CHECK16-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148
@@ -15546,11 +15490,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK16-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK16-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -15628,7 +15572,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK16-NEXT:    store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK16-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]])
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -15670,7 +15614,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -15785,11 +15729,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 4
 // CHECK16-NEXT:    [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
 // CHECK16-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -15859,7 +15803,7 @@ int main (int argc, char **argv) {
 // CHECK16:       omp.inner.for.body:
 // CHECK16-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK16-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]])
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -15878,7 +15822,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -15996,11 +15940,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK16-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK16-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16076,7 +16020,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK16-NEXT:    store i32 [[TMP18]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK16-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]])
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -16095,7 +16039,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16197,7 +16141,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK16-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK16-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK16-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK16-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -16237,7 +16181,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK16-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK16-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK16-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK16:       omp_offload.failed:
@@ -16255,7 +16199,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK16-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l121.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l121.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK16-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK16-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK16:       omp_offload.failed5:
@@ -16284,7 +16228,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK16-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l126.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l126.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK16-NEXT:    [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
 // CHECK16-NEXT:    br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK16:       omp_offload.failed11:
@@ -16302,7 +16246,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK16-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l131.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l131.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK16-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
 // CHECK16-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK16:       omp_offload.failed17:
@@ -16331,7 +16275,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
 // CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK16-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l136.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l136.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK16-NEXT:    [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0
 // CHECK16-NEXT:    br i1 [[TMP58]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]]
 // CHECK16:       omp_offload.failed24:
@@ -16347,11 +16291,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK16-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK16-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16397,7 +16341,7 @@ int main (int argc, char **argv) {
 // CHECK16:       omp.inner.for.body:
 // CHECK16-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK16-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -16412,7 +16356,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16492,11 +16436,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK16-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK16-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16542,7 +16486,7 @@ int main (int argc, char **argv) {
 // CHECK16:       omp.inner.for.body:
 // CHECK16-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK16-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -16557,7 +16501,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16646,11 +16590,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK16-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK16-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16702,7 +16646,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK16-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK16-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -16717,7 +16661,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16819,11 +16763,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK16-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK16-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16869,7 +16813,7 @@ int main (int argc, char **argv) {
 // CHECK16:       omp.inner.for.body:
 // CHECK16-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK16-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -16884,7 +16828,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -16971,11 +16915,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK16-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK16-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -17027,7 +16971,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK16-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK16-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -17042,7 +16986,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -17118,7 +17062,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK16-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK16-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK16-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK16-NEXT:    ret void
@@ -17192,14 +17136,14 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK17-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK17-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK17-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK17-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK17-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK17-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK17-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK17-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK17-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK17-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK17-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -17208,300 +17152,286 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK17-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK17-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK17-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK17-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK17-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK17-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK17-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK17-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK17-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK17-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK17-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK17-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK17-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK17-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK17-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK17-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK17-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK17-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK17-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK17-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK17-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK17-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK17-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK17-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK17-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK17-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK17-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK17-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK17-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK17-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK17-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK17-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK17:       omp_offload.failed:
 // CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK17:       omp_offload.cont:
-// CHECK17-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK17-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK17-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK17-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK17-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK17-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK17-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK17-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i64 24, i1 false)
 // CHECK17-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK17-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK17-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK17-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK17-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK17-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK17-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK17-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK17-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK17-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK17-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK17-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK17-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK17-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK17-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK17-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK17-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK17-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK17-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK17-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK17-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK17-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK17-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK17-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK17-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK17-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK17-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK17-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK17-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK17-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK17-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK17-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK17-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK17-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK17-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK17-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK17-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK17-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK17-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP60]])
-// CHECK17-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK17-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK17-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK17-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK17-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP58]])
+// CHECK17-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK17-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK17:       omp_offload.failed16:
-// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK17:       omp_offload.cont17:
-// CHECK17-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M]], align 4
+// CHECK17-NEXT:    [[TMP61:%.*]] = load i32, i32* [[M]], align 4
 // CHECK17-NEXT:    [[CONV18:%.*]] = bitcast i64* [[M_CASTED]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP63]], i32* [[CONV18]], align 4
-// CHECK17-NEXT:    [[TMP64:%.*]] = load i64, i64* [[M_CASTED]], align 8
-// CHECK17-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP61]], i32* [[CONV18]], align 4
+// CHECK17-NEXT:    [[TMP62:%.*]] = load i64, i64* [[M_CASTED]], align 8
+// CHECK17-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
 // CHECK17-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP65]], i32* [[CONV20]], align 4
-// CHECK17-NEXT:    [[TMP66:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK17-NEXT:    [[TMP67:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK17-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP64]], i64* [[TMP69]], align 8
-// CHECK17-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP64]], i64* [[TMP71]], align 8
-// CHECK17-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 4, i64* [[TMP72]], align 8
-// CHECK17-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
-// CHECK17-NEXT:    store i8* null, i8** [[TMP73]], align 8
-// CHECK17-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK17-NEXT:    store i32 [[TMP63]], i32* [[CONV20]], align 4
+// CHECK17-NEXT:    [[TMP64:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK17-NEXT:    [[TMP65:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK17-NEXT:    [[TMP66:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES24]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP66]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i64 32, i1 false)
+// CHECK17-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP62]], i64* [[TMP68]], align 8
+// CHECK17-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP62]], i64* [[TMP70]], align 8
+// CHECK17-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
+// CHECK17-NEXT:    store i8* null, i8** [[TMP71]], align 8
+// CHECK17-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP64]], i64* [[TMP73]], align 8
+// CHECK17-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
 // CHECK17-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP66]], i64* [[TMP75]], align 8
-// CHECK17-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP66]], i64* [[TMP77]], align 8
-// CHECK17-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 4, i64* [[TMP78]], align 8
-// CHECK17-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP79]], align 8
-// CHECK17-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP81]], align 8
-// CHECK17-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP83]], align 8
-// CHECK17-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 8, i64* [[TMP84]], align 8
-// CHECK17-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP85]], align 8
-// CHECK17-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP87]], align 8
-// CHECK17-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP89]], align 8
-// CHECK17-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 3
-// CHECK17-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK17-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
-// CHECK17-NEXT:    store i8* null, i8** [[TMP91]], align 8
-// CHECK17-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP95:%.*]] = load i32, i32* [[N]], align 4
-// CHECK17-NEXT:    store i32 [[TMP95]], i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK17-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK17-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP96]], 0
+// CHECK17-NEXT:    store i64 [[TMP64]], i64* [[TMP75]], align 8
+// CHECK17-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP76]], align 8
+// CHECK17-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
+// CHECK17-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP80]], align 8
+// CHECK17-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP81]], align 8
+// CHECK17-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 8
+// CHECK17-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP85]], align 8
+// CHECK17-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 3
+// CHECK17-NEXT:    store i64 [[TMP65]], i64* [[TMP86]], align 8
+// CHECK17-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
+// CHECK17-NEXT:    store i8* null, i8** [[TMP87]], align 8
+// CHECK17-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP91:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP91]], i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK17-NEXT:    [[TMP92:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK17-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP92]], 0
 // CHECK17-NEXT:    [[DIV29:%.*]] = sdiv i32 [[SUB28]], 1
 // CHECK17-NEXT:    [[SUB30:%.*]] = sub nsw i32 [[DIV29]], 1
 // CHECK17-NEXT:    store i32 [[SUB30]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK17-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK17-NEXT:    [[ADD31:%.*]] = add nsw i32 [[TMP97]], 1
-// CHECK17-NEXT:    [[TMP98:%.*]] = zext i32 [[ADD31]] to i64
-// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP98]])
-// CHECK17-NEXT:    [[TMP99:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158.region_id, i32 4, i8** [[TMP92]], i8** [[TMP93]], i64* [[TMP94]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP100:%.*]] = icmp ne i32 [[TMP99]], 0
-// CHECK17-NEXT:    br i1 [[TMP100]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
+// CHECK17-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK17-NEXT:    [[ADD31:%.*]] = add nsw i32 [[TMP93]], 1
+// CHECK17-NEXT:    [[TMP94:%.*]] = zext i32 [[ADD31]] to i64
+// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP94]])
+// CHECK17-NEXT:    [[TMP95:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158.region_id, i32 4, i8** [[TMP88]], i8** [[TMP89]], i64* [[TMP90]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP96:%.*]] = icmp ne i32 [[TMP95]], 0
+// CHECK17-NEXT:    br i1 [[TMP96]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
 // CHECK17:       omp_offload.failed32:
-// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158(i64 [[TMP64]], i64 [[TMP66]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158(i64 [[TMP62]], i64 [[TMP64]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT33]]
 // CHECK17:       omp_offload.cont33:
-// CHECK17-NEXT:    [[TMP101:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    [[TMP97:%.*]] = load i32, i32* [[N]], align 4
 // CHECK17-NEXT:    [[CONV35:%.*]] = bitcast i64* [[N_CASTED34]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP101]], i32* [[CONV35]], align 4
-// CHECK17-NEXT:    [[TMP102:%.*]] = load i64, i64* [[N_CASTED34]], align 8
-// CHECK17-NEXT:    [[TMP103:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK17-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP102]], i64* [[TMP105]], align 8
-// CHECK17-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK17-NEXT:    store i32 [[TMP97]], i32* [[CONV35]], align 4
+// CHECK17-NEXT:    [[TMP98:%.*]] = load i64, i64* [[N_CASTED34]], align 8
+// CHECK17-NEXT:    [[TMP99:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK17-NEXT:    [[TMP100:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES39]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP100]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i64 24, i1 false)
+// CHECK17-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP98]], i64* [[TMP102]], align 8
+// CHECK17-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP98]], i64* [[TMP104]], align 8
+// CHECK17-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 0
+// CHECK17-NEXT:    store i8* null, i8** [[TMP105]], align 8
+// CHECK17-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 1
 // CHECK17-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP102]], i64* [[TMP107]], align 8
-// CHECK17-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 4, i64* [[TMP108]], align 8
-// CHECK17-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 0
-// CHECK17-NEXT:    store i8* null, i8** [[TMP109]], align 8
-// CHECK17-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP111]], align 8
-// CHECK17-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP113]], align 8
-// CHECK17-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 8, i64* [[TMP114]], align 8
-// CHECK17-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP115]], align 8
-// CHECK17-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP117]], align 8
-// CHECK17-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP119]], align 8
-// CHECK17-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 [[TMP103]], i64* [[TMP120]], align 8
-// CHECK17-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP121]], align 8
-// CHECK17-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP125:%.*]] = load i32, i32* [[N]], align 4
-// CHECK17-NEXT:    store i32 [[TMP125]], i32* [[DOTCAPTURE_EXPR_41]], align 4
-// CHECK17-NEXT:    [[TMP126:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_41]], align 4
-// CHECK17-NEXT:    [[SUB43:%.*]] = sub nsw i32 [[TMP126]], 0
+// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP107]], align 8
+// CHECK17-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP109]], align 8
+// CHECK17-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP110]], align 8
+// CHECK17-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP112]], align 8
+// CHECK17-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP114]], align 8
+// CHECK17-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 2
+// CHECK17-NEXT:    store i64 [[TMP99]], i64* [[TMP115]], align 8
+// CHECK17-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP116]], align 8
+// CHECK17-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP120:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP120]], i32* [[DOTCAPTURE_EXPR_41]], align 4
+// CHECK17-NEXT:    [[TMP121:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_41]], align 4
+// CHECK17-NEXT:    [[SUB43:%.*]] = sub nsw i32 [[TMP121]], 0
 // CHECK17-NEXT:    [[DIV44:%.*]] = sdiv i32 [[SUB43]], 1
 // CHECK17-NEXT:    [[SUB45:%.*]] = sub nsw i32 [[DIV44]], 1
 // CHECK17-NEXT:    store i32 [[SUB45]], i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK17-NEXT:    [[TMP127:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK17-NEXT:    [[ADD46:%.*]] = add nsw i32 [[TMP127]], 1
-// CHECK17-NEXT:    [[TMP128:%.*]] = zext i32 [[ADD46]] to i64
-// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP128]])
-// CHECK17-NEXT:    [[TMP129:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163.region_id, i32 3, i8** [[TMP122]], i8** [[TMP123]], i64* [[TMP124]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP130:%.*]] = icmp ne i32 [[TMP129]], 0
-// CHECK17-NEXT:    br i1 [[TMP130]], label [[OMP_OFFLOAD_FAILED47:%.*]], label [[OMP_OFFLOAD_CONT48:%.*]]
+// CHECK17-NEXT:    [[TMP122:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
+// CHECK17-NEXT:    [[ADD46:%.*]] = add nsw i32 [[TMP122]], 1
+// CHECK17-NEXT:    [[TMP123:%.*]] = zext i32 [[ADD46]] to i64
+// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP123]])
+// CHECK17-NEXT:    [[TMP124:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163.region_id, i32 3, i8** [[TMP117]], i8** [[TMP118]], i64* [[TMP119]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP125:%.*]] = icmp ne i32 [[TMP124]], 0
+// CHECK17-NEXT:    br i1 [[TMP125]], label [[OMP_OFFLOAD_FAILED47:%.*]], label [[OMP_OFFLOAD_CONT48:%.*]]
 // CHECK17:       omp_offload.failed47:
-// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163(i64 [[TMP102]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163(i64 [[TMP98]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT48]]
 // CHECK17:       omp_offload.cont48:
-// CHECK17-NEXT:    [[TMP131:%.*]] = load i32, i32* [[M]], align 4
+// CHECK17-NEXT:    [[TMP126:%.*]] = load i32, i32* [[M]], align 4
 // CHECK17-NEXT:    [[CONV50:%.*]] = bitcast i64* [[M_CASTED49]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP131]], i32* [[CONV50]], align 4
-// CHECK17-NEXT:    [[TMP132:%.*]] = load i64, i64* [[M_CASTED49]], align 8
-// CHECK17-NEXT:    [[TMP133:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP126]], i32* [[CONV50]], align 4
+// CHECK17-NEXT:    [[TMP127:%.*]] = load i64, i64* [[M_CASTED49]], align 8
+// CHECK17-NEXT:    [[TMP128:%.*]] = load i32, i32* [[N]], align 4
 // CHECK17-NEXT:    [[CONV52:%.*]] = bitcast i64* [[N_CASTED51]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP133]], i32* [[CONV52]], align 4
-// CHECK17-NEXT:    [[TMP134:%.*]] = load i64, i64* [[N_CASTED51]], align 8
-// CHECK17-NEXT:    [[TMP135:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK17-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP132]], i64* [[TMP137]], align 8
-// CHECK17-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP132]], i64* [[TMP139]], align 8
-// CHECK17-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 4, i64* [[TMP140]], align 8
-// CHECK17-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 0
+// CHECK17-NEXT:    store i32 [[TMP128]], i32* [[CONV52]], align 4
+// CHECK17-NEXT:    [[TMP129:%.*]] = load i64, i64* [[N_CASTED51]], align 8
+// CHECK17-NEXT:    [[TMP130:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK17-NEXT:    [[TMP131:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES56]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP131]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i64 32, i1 false)
+// CHECK17-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP127]], i64* [[TMP133]], align 8
+// CHECK17-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP127]], i64* [[TMP135]], align 8
+// CHECK17-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 0
+// CHECK17-NEXT:    store i8* null, i8** [[TMP136]], align 8
+// CHECK17-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP129]], i64* [[TMP138]], align 8
+// CHECK17-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP129]], i64* [[TMP140]], align 8
+// CHECK17-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 1
 // CHECK17-NEXT:    store i8* null, i8** [[TMP141]], align 8
-// CHECK17-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 2
 // CHECK17-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP134]], i64* [[TMP143]], align 8
-// CHECK17-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 1
+// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP143]], align 8
+// CHECK17-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 2
 // CHECK17-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP134]], i64* [[TMP145]], align 8
-// CHECK17-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 4, i64* [[TMP146]], align 8
-// CHECK17-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP147]], align 8
-// CHECK17-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP149]], align 8
-// CHECK17-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP151]], align 8
-// CHECK17-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 8, i64* [[TMP152]], align 8
-// CHECK17-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP153]], align 8
-// CHECK17-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP155]], align 8
-// CHECK17-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP157]], align 8
-// CHECK17-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 3
-// CHECK17-NEXT:    store i64 [[TMP135]], i64* [[TMP158]], align 8
-// CHECK17-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 3
-// CHECK17-NEXT:    store i8* null, i8** [[TMP159]], align 8
-// CHECK17-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP163:%.*]] = load i32, i32* [[N]], align 4
-// CHECK17-NEXT:    store i32 [[TMP163]], i32* [[DOTCAPTURE_EXPR_58]], align 4
-// CHECK17-NEXT:    [[TMP164:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_58]], align 4
-// CHECK17-NEXT:    [[SUB60:%.*]] = sub nsw i32 [[TMP164]], 0
+// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP145]], align 8
+// CHECK17-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP146]], align 8
+// CHECK17-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP148:%.*]] = bitcast i8** [[TMP147]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP148]], align 8
+// CHECK17-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP150:%.*]] = bitcast i8** [[TMP149]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP150]], align 8
+// CHECK17-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 3
+// CHECK17-NEXT:    store i64 [[TMP130]], i64* [[TMP151]], align 8
+// CHECK17-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 3
+// CHECK17-NEXT:    store i8* null, i8** [[TMP152]], align 8
+// CHECK17-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP156:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP156]], i32* [[DOTCAPTURE_EXPR_58]], align 4
+// CHECK17-NEXT:    [[TMP157:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_58]], align 4
+// CHECK17-NEXT:    [[SUB60:%.*]] = sub nsw i32 [[TMP157]], 0
 // CHECK17-NEXT:    [[DIV61:%.*]] = sdiv i32 [[SUB60]], 1
 // CHECK17-NEXT:    [[SUB62:%.*]] = sub nsw i32 [[DIV61]], 1
 // CHECK17-NEXT:    store i32 [[SUB62]], i32* [[DOTCAPTURE_EXPR_59]], align 4
-// CHECK17-NEXT:    [[TMP165:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_59]], align 4
-// CHECK17-NEXT:    [[ADD63:%.*]] = add nsw i32 [[TMP165]], 1
-// CHECK17-NEXT:    [[TMP166:%.*]] = zext i32 [[ADD63]] to i64
-// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP166]])
-// CHECK17-NEXT:    [[TMP167:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168.region_id, i32 4, i8** [[TMP160]], i8** [[TMP161]], i64* [[TMP162]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP168:%.*]] = icmp ne i32 [[TMP167]], 0
-// CHECK17-NEXT:    br i1 [[TMP168]], label [[OMP_OFFLOAD_FAILED64:%.*]], label [[OMP_OFFLOAD_CONT65:%.*]]
+// CHECK17-NEXT:    [[TMP158:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_59]], align 4
+// CHECK17-NEXT:    [[ADD63:%.*]] = add nsw i32 [[TMP158]], 1
+// CHECK17-NEXT:    [[TMP159:%.*]] = zext i32 [[ADD63]] to i64
+// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP159]])
+// CHECK17-NEXT:    [[TMP160:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168.region_id, i32 4, i8** [[TMP153]], i8** [[TMP154]], i64* [[TMP155]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP161:%.*]] = icmp ne i32 [[TMP160]], 0
+// CHECK17-NEXT:    br i1 [[TMP161]], label [[OMP_OFFLOAD_FAILED64:%.*]], label [[OMP_OFFLOAD_CONT65:%.*]]
 // CHECK17:       omp_offload.failed64:
-// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168(i64 [[TMP132]], i64 [[TMP134]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168(i64 [[TMP127]], i64 [[TMP129]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT65]]
 // CHECK17:       omp_offload.cont65:
-// CHECK17-NEXT:    [[TMP169:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK17-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP169]])
+// CHECK17-NEXT:    [[TMP162:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK17-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP162]])
 // CHECK17-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK17-NEXT:    [[TMP170:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP170]])
-// CHECK17-NEXT:    [[TMP171:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK17-NEXT:    ret i32 [[TMP171]]
+// CHECK17-NEXT:    [[TMP163:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP163]])
+// CHECK17-NEXT:    [[TMP164:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK17-NEXT:    ret i32 [[TMP164]]
 //
 //
 // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148
@@ -17951,11 +17881,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP3]], i32* [[CONV2]], align 4
 // CHECK17-NEXT:    [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -18037,7 +17967,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP21]], i32* [[CONV7]], align 4
 // CHECK17-NEXT:    [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]])
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -18079,7 +18009,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -18199,11 +18129,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32*
 // CHECK17-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
 // CHECK17-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -18275,7 +18205,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
 // CHECK17-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK17-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]])
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -18294,7 +18224,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -18418,11 +18348,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP3]], i32* [[CONV2]], align 4
 // CHECK17-NEXT:    [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -18502,7 +18432,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP20]], i32* [[CONV7]], align 4
 // CHECK17-NEXT:    [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]])
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -18521,7 +18451,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -18627,7 +18557,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK17-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK17-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK17-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -18667,7 +18597,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK17-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK17-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK17-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK17:       omp_offload.failed:
@@ -18685,7 +18615,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK17-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l121.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l121.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK17-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK17-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK17:       omp_offload.failed5:
@@ -18715,7 +18645,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK17-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l126.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l126.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK17-NEXT:    [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
 // CHECK17-NEXT:    br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK17:       omp_offload.failed11:
@@ -18733,7 +18663,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK17-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l131.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l131.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK17-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
 // CHECK17-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK17:       omp_offload.failed17:
@@ -18763,7 +18693,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
 // CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK17-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l136.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l136.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK17-NEXT:    [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0
 // CHECK17-NEXT:    br i1 [[TMP58]], label [[OMP_OFFLOAD_FAILED25:%.*]], label [[OMP_OFFLOAD_CONT26:%.*]]
 // CHECK17:       omp_offload.failed25:
@@ -18779,11 +18709,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK17-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK17-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -18831,7 +18761,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK17-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK17-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -18846,7 +18776,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -18929,11 +18859,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK17-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK17-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -18981,7 +18911,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK17-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK17-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -18996,7 +18926,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -19090,11 +19020,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK17-NEXT:    [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -19150,7 +19080,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4
 // CHECK17-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -19165,7 +19095,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -19273,11 +19203,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK17-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK17-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -19325,7 +19255,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK17-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK17-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -19340,7 +19270,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -19432,11 +19362,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK17-NEXT:    [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -19492,7 +19422,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4
 // CHECK17-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -19507,7 +19437,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -19587,7 +19517,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK17-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK17-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK17-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK17-NEXT:    ret void
@@ -19661,14 +19591,14 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK18-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK18-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK18-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK18-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK18-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK18-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK18-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK18-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK18-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK18-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK18-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -19677,300 +19607,286 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK18-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK18-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK18-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK18-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK18-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK18-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK18-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK18-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK18-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK18-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK18-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK18-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK18-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK18-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK18-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK18-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK18-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK18-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK18-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK18-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK18-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK18-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK18-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK18-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK18-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK18-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK18-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK18-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK18-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK18-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK18-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK18-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK18:       omp_offload.failed:
 // CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK18:       omp_offload.cont:
-// CHECK18-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK18-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK18-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK18-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK18-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK18-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK18-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK18-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i64 24, i1 false)
 // CHECK18-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK18-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK18-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK18-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK18-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK18-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK18-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK18-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK18-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK18-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK18-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK18-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK18-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK18-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK18-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK18-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK18-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK18-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK18-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK18-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK18-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK18-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK18-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK18-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK18-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK18-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK18-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK18-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK18-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK18-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK18-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK18-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK18-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK18-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK18-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK18-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK18-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK18-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK18-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP60]])
-// CHECK18-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK18-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK18-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK18-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK18-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP58]])
+// CHECK18-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK18-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK18:       omp_offload.failed16:
-// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK18:       omp_offload.cont17:
-// CHECK18-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M]], align 4
+// CHECK18-NEXT:    [[TMP61:%.*]] = load i32, i32* [[M]], align 4
 // CHECK18-NEXT:    [[CONV18:%.*]] = bitcast i64* [[M_CASTED]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP63]], i32* [[CONV18]], align 4
-// CHECK18-NEXT:    [[TMP64:%.*]] = load i64, i64* [[M_CASTED]], align 8
-// CHECK18-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP61]], i32* [[CONV18]], align 4
+// CHECK18-NEXT:    [[TMP62:%.*]] = load i64, i64* [[M_CASTED]], align 8
+// CHECK18-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
 // CHECK18-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP65]], i32* [[CONV20]], align 4
-// CHECK18-NEXT:    [[TMP66:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK18-NEXT:    [[TMP67:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK18-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP64]], i64* [[TMP69]], align 8
-// CHECK18-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP64]], i64* [[TMP71]], align 8
-// CHECK18-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 4, i64* [[TMP72]], align 8
-// CHECK18-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
-// CHECK18-NEXT:    store i8* null, i8** [[TMP73]], align 8
-// CHECK18-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK18-NEXT:    store i32 [[TMP63]], i32* [[CONV20]], align 4
+// CHECK18-NEXT:    [[TMP64:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK18-NEXT:    [[TMP65:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK18-NEXT:    [[TMP66:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES24]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP66]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i64 32, i1 false)
+// CHECK18-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP62]], i64* [[TMP68]], align 8
+// CHECK18-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP62]], i64* [[TMP70]], align 8
+// CHECK18-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
+// CHECK18-NEXT:    store i8* null, i8** [[TMP71]], align 8
+// CHECK18-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP64]], i64* [[TMP73]], align 8
+// CHECK18-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
 // CHECK18-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP66]], i64* [[TMP75]], align 8
-// CHECK18-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP66]], i64* [[TMP77]], align 8
-// CHECK18-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 4, i64* [[TMP78]], align 8
-// CHECK18-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP79]], align 8
-// CHECK18-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP81]], align 8
-// CHECK18-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP83]], align 8
-// CHECK18-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 8, i64* [[TMP84]], align 8
-// CHECK18-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP85]], align 8
-// CHECK18-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP87]], align 8
-// CHECK18-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP89]], align 8
-// CHECK18-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 3
-// CHECK18-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK18-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
-// CHECK18-NEXT:    store i8* null, i8** [[TMP91]], align 8
-// CHECK18-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP95:%.*]] = load i32, i32* [[N]], align 4
-// CHECK18-NEXT:    store i32 [[TMP95]], i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK18-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK18-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP96]], 0
+// CHECK18-NEXT:    store i64 [[TMP64]], i64* [[TMP75]], align 8
+// CHECK18-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP76]], align 8
+// CHECK18-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
+// CHECK18-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP80]], align 8
+// CHECK18-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP81]], align 8
+// CHECK18-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 8
+// CHECK18-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP85]], align 8
+// CHECK18-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 3
+// CHECK18-NEXT:    store i64 [[TMP65]], i64* [[TMP86]], align 8
+// CHECK18-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
+// CHECK18-NEXT:    store i8* null, i8** [[TMP87]], align 8
+// CHECK18-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP91:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP91]], i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK18-NEXT:    [[TMP92:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK18-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP92]], 0
 // CHECK18-NEXT:    [[DIV29:%.*]] = sdiv i32 [[SUB28]], 1
 // CHECK18-NEXT:    [[SUB30:%.*]] = sub nsw i32 [[DIV29]], 1
 // CHECK18-NEXT:    store i32 [[SUB30]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK18-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK18-NEXT:    [[ADD31:%.*]] = add nsw i32 [[TMP97]], 1
-// CHECK18-NEXT:    [[TMP98:%.*]] = zext i32 [[ADD31]] to i64
-// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP98]])
-// CHECK18-NEXT:    [[TMP99:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158.region_id, i32 4, i8** [[TMP92]], i8** [[TMP93]], i64* [[TMP94]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP100:%.*]] = icmp ne i32 [[TMP99]], 0
-// CHECK18-NEXT:    br i1 [[TMP100]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
+// CHECK18-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK18-NEXT:    [[ADD31:%.*]] = add nsw i32 [[TMP93]], 1
+// CHECK18-NEXT:    [[TMP94:%.*]] = zext i32 [[ADD31]] to i64
+// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP94]])
+// CHECK18-NEXT:    [[TMP95:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158.region_id, i32 4, i8** [[TMP88]], i8** [[TMP89]], i64* [[TMP90]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP96:%.*]] = icmp ne i32 [[TMP95]], 0
+// CHECK18-NEXT:    br i1 [[TMP96]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
 // CHECK18:       omp_offload.failed32:
-// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158(i64 [[TMP64]], i64 [[TMP66]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158(i64 [[TMP62]], i64 [[TMP64]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT33]]
 // CHECK18:       omp_offload.cont33:
-// CHECK18-NEXT:    [[TMP101:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    [[TMP97:%.*]] = load i32, i32* [[N]], align 4
 // CHECK18-NEXT:    [[CONV35:%.*]] = bitcast i64* [[N_CASTED34]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP101]], i32* [[CONV35]], align 4
-// CHECK18-NEXT:    [[TMP102:%.*]] = load i64, i64* [[N_CASTED34]], align 8
-// CHECK18-NEXT:    [[TMP103:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK18-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP102]], i64* [[TMP105]], align 8
-// CHECK18-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK18-NEXT:    store i32 [[TMP97]], i32* [[CONV35]], align 4
+// CHECK18-NEXT:    [[TMP98:%.*]] = load i64, i64* [[N_CASTED34]], align 8
+// CHECK18-NEXT:    [[TMP99:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK18-NEXT:    [[TMP100:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES39]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP100]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i64 24, i1 false)
+// CHECK18-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP98]], i64* [[TMP102]], align 8
+// CHECK18-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP98]], i64* [[TMP104]], align 8
+// CHECK18-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 0
+// CHECK18-NEXT:    store i8* null, i8** [[TMP105]], align 8
+// CHECK18-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 1
 // CHECK18-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP102]], i64* [[TMP107]], align 8
-// CHECK18-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 4, i64* [[TMP108]], align 8
-// CHECK18-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 0
-// CHECK18-NEXT:    store i8* null, i8** [[TMP109]], align 8
-// CHECK18-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP111]], align 8
-// CHECK18-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP113]], align 8
-// CHECK18-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 8, i64* [[TMP114]], align 8
-// CHECK18-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP115]], align 8
-// CHECK18-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP117]], align 8
-// CHECK18-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP119]], align 8
-// CHECK18-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 [[TMP103]], i64* [[TMP120]], align 8
-// CHECK18-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP121]], align 8
-// CHECK18-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP125:%.*]] = load i32, i32* [[N]], align 4
-// CHECK18-NEXT:    store i32 [[TMP125]], i32* [[DOTCAPTURE_EXPR_41]], align 4
-// CHECK18-NEXT:    [[TMP126:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_41]], align 4
-// CHECK18-NEXT:    [[SUB43:%.*]] = sub nsw i32 [[TMP126]], 0
+// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP107]], align 8
+// CHECK18-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP109]], align 8
+// CHECK18-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP110]], align 8
+// CHECK18-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP112]], align 8
+// CHECK18-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP114]], align 8
+// CHECK18-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 2
+// CHECK18-NEXT:    store i64 [[TMP99]], i64* [[TMP115]], align 8
+// CHECK18-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP116]], align 8
+// CHECK18-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP120:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP120]], i32* [[DOTCAPTURE_EXPR_41]], align 4
+// CHECK18-NEXT:    [[TMP121:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_41]], align 4
+// CHECK18-NEXT:    [[SUB43:%.*]] = sub nsw i32 [[TMP121]], 0
 // CHECK18-NEXT:    [[DIV44:%.*]] = sdiv i32 [[SUB43]], 1
 // CHECK18-NEXT:    [[SUB45:%.*]] = sub nsw i32 [[DIV44]], 1
 // CHECK18-NEXT:    store i32 [[SUB45]], i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK18-NEXT:    [[TMP127:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK18-NEXT:    [[ADD46:%.*]] = add nsw i32 [[TMP127]], 1
-// CHECK18-NEXT:    [[TMP128:%.*]] = zext i32 [[ADD46]] to i64
-// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP128]])
-// CHECK18-NEXT:    [[TMP129:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163.region_id, i32 3, i8** [[TMP122]], i8** [[TMP123]], i64* [[TMP124]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP130:%.*]] = icmp ne i32 [[TMP129]], 0
-// CHECK18-NEXT:    br i1 [[TMP130]], label [[OMP_OFFLOAD_FAILED47:%.*]], label [[OMP_OFFLOAD_CONT48:%.*]]
+// CHECK18-NEXT:    [[TMP122:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
+// CHECK18-NEXT:    [[ADD46:%.*]] = add nsw i32 [[TMP122]], 1
+// CHECK18-NEXT:    [[TMP123:%.*]] = zext i32 [[ADD46]] to i64
+// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP123]])
+// CHECK18-NEXT:    [[TMP124:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163.region_id, i32 3, i8** [[TMP117]], i8** [[TMP118]], i64* [[TMP119]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP125:%.*]] = icmp ne i32 [[TMP124]], 0
+// CHECK18-NEXT:    br i1 [[TMP125]], label [[OMP_OFFLOAD_FAILED47:%.*]], label [[OMP_OFFLOAD_CONT48:%.*]]
 // CHECK18:       omp_offload.failed47:
-// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163(i64 [[TMP102]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163(i64 [[TMP98]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT48]]
 // CHECK18:       omp_offload.cont48:
-// CHECK18-NEXT:    [[TMP131:%.*]] = load i32, i32* [[M]], align 4
+// CHECK18-NEXT:    [[TMP126:%.*]] = load i32, i32* [[M]], align 4
 // CHECK18-NEXT:    [[CONV50:%.*]] = bitcast i64* [[M_CASTED49]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP131]], i32* [[CONV50]], align 4
-// CHECK18-NEXT:    [[TMP132:%.*]] = load i64, i64* [[M_CASTED49]], align 8
-// CHECK18-NEXT:    [[TMP133:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP126]], i32* [[CONV50]], align 4
+// CHECK18-NEXT:    [[TMP127:%.*]] = load i64, i64* [[M_CASTED49]], align 8
+// CHECK18-NEXT:    [[TMP128:%.*]] = load i32, i32* [[N]], align 4
 // CHECK18-NEXT:    [[CONV52:%.*]] = bitcast i64* [[N_CASTED51]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP133]], i32* [[CONV52]], align 4
-// CHECK18-NEXT:    [[TMP134:%.*]] = load i64, i64* [[N_CASTED51]], align 8
-// CHECK18-NEXT:    [[TMP135:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK18-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP132]], i64* [[TMP137]], align 8
-// CHECK18-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP132]], i64* [[TMP139]], align 8
-// CHECK18-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 4, i64* [[TMP140]], align 8
-// CHECK18-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 0
+// CHECK18-NEXT:    store i32 [[TMP128]], i32* [[CONV52]], align 4
+// CHECK18-NEXT:    [[TMP129:%.*]] = load i64, i64* [[N_CASTED51]], align 8
+// CHECK18-NEXT:    [[TMP130:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK18-NEXT:    [[TMP131:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES56]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP131]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i64 32, i1 false)
+// CHECK18-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP127]], i64* [[TMP133]], align 8
+// CHECK18-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP127]], i64* [[TMP135]], align 8
+// CHECK18-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 0
+// CHECK18-NEXT:    store i8* null, i8** [[TMP136]], align 8
+// CHECK18-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP129]], i64* [[TMP138]], align 8
+// CHECK18-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP129]], i64* [[TMP140]], align 8
+// CHECK18-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 1
 // CHECK18-NEXT:    store i8* null, i8** [[TMP141]], align 8
-// CHECK18-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 2
 // CHECK18-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP134]], i64* [[TMP143]], align 8
-// CHECK18-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 1
+// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP143]], align 8
+// CHECK18-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 2
 // CHECK18-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP134]], i64* [[TMP145]], align 8
-// CHECK18-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 4, i64* [[TMP146]], align 8
-// CHECK18-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP147]], align 8
-// CHECK18-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP149]], align 8
-// CHECK18-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP151]], align 8
-// CHECK18-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 8, i64* [[TMP152]], align 8
-// CHECK18-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP153]], align 8
-// CHECK18-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP155]], align 8
-// CHECK18-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP157]], align 8
-// CHECK18-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 3
-// CHECK18-NEXT:    store i64 [[TMP135]], i64* [[TMP158]], align 8
-// CHECK18-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 3
-// CHECK18-NEXT:    store i8* null, i8** [[TMP159]], align 8
-// CHECK18-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP163:%.*]] = load i32, i32* [[N]], align 4
-// CHECK18-NEXT:    store i32 [[TMP163]], i32* [[DOTCAPTURE_EXPR_58]], align 4
-// CHECK18-NEXT:    [[TMP164:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_58]], align 4
-// CHECK18-NEXT:    [[SUB60:%.*]] = sub nsw i32 [[TMP164]], 0
+// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP145]], align 8
+// CHECK18-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP146]], align 8
+// CHECK18-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP148:%.*]] = bitcast i8** [[TMP147]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP148]], align 8
+// CHECK18-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP150:%.*]] = bitcast i8** [[TMP149]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP150]], align 8
+// CHECK18-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 3
+// CHECK18-NEXT:    store i64 [[TMP130]], i64* [[TMP151]], align 8
+// CHECK18-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 3
+// CHECK18-NEXT:    store i8* null, i8** [[TMP152]], align 8
+// CHECK18-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP156:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP156]], i32* [[DOTCAPTURE_EXPR_58]], align 4
+// CHECK18-NEXT:    [[TMP157:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_58]], align 4
+// CHECK18-NEXT:    [[SUB60:%.*]] = sub nsw i32 [[TMP157]], 0
 // CHECK18-NEXT:    [[DIV61:%.*]] = sdiv i32 [[SUB60]], 1
 // CHECK18-NEXT:    [[SUB62:%.*]] = sub nsw i32 [[DIV61]], 1
 // CHECK18-NEXT:    store i32 [[SUB62]], i32* [[DOTCAPTURE_EXPR_59]], align 4
-// CHECK18-NEXT:    [[TMP165:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_59]], align 4
-// CHECK18-NEXT:    [[ADD63:%.*]] = add nsw i32 [[TMP165]], 1
-// CHECK18-NEXT:    [[TMP166:%.*]] = zext i32 [[ADD63]] to i64
-// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP166]])
-// CHECK18-NEXT:    [[TMP167:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168.region_id, i32 4, i8** [[TMP160]], i8** [[TMP161]], i64* [[TMP162]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP168:%.*]] = icmp ne i32 [[TMP167]], 0
-// CHECK18-NEXT:    br i1 [[TMP168]], label [[OMP_OFFLOAD_FAILED64:%.*]], label [[OMP_OFFLOAD_CONT65:%.*]]
+// CHECK18-NEXT:    [[TMP158:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_59]], align 4
+// CHECK18-NEXT:    [[ADD63:%.*]] = add nsw i32 [[TMP158]], 1
+// CHECK18-NEXT:    [[TMP159:%.*]] = zext i32 [[ADD63]] to i64
+// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP159]])
+// CHECK18-NEXT:    [[TMP160:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168.region_id, i32 4, i8** [[TMP153]], i8** [[TMP154]], i64* [[TMP155]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP161:%.*]] = icmp ne i32 [[TMP160]], 0
+// CHECK18-NEXT:    br i1 [[TMP161]], label [[OMP_OFFLOAD_FAILED64:%.*]], label [[OMP_OFFLOAD_CONT65:%.*]]
 // CHECK18:       omp_offload.failed64:
-// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168(i64 [[TMP132]], i64 [[TMP134]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168(i64 [[TMP127]], i64 [[TMP129]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT65]]
 // CHECK18:       omp_offload.cont65:
-// CHECK18-NEXT:    [[TMP169:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK18-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP169]])
+// CHECK18-NEXT:    [[TMP162:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK18-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP162]])
 // CHECK18-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK18-NEXT:    [[TMP170:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP170]])
-// CHECK18-NEXT:    [[TMP171:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK18-NEXT:    ret i32 [[TMP171]]
+// CHECK18-NEXT:    [[TMP163:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP163]])
+// CHECK18-NEXT:    [[TMP164:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK18-NEXT:    ret i32 [[TMP164]]
 //
 //
 // CHECK18-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148
@@ -20420,11 +20336,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP3]], i32* [[CONV2]], align 4
 // CHECK18-NEXT:    [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -20506,7 +20422,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP21]], i32* [[CONV7]], align 4
 // CHECK18-NEXT:    [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]])
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -20548,7 +20464,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -20668,11 +20584,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32*
 // CHECK18-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
 // CHECK18-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -20744,7 +20660,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
 // CHECK18-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK18-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]])
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -20763,7 +20679,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -20887,11 +20803,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP3]], i32* [[CONV2]], align 4
 // CHECK18-NEXT:    [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -20971,7 +20887,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP20]], i32* [[CONV7]], align 4
 // CHECK18-NEXT:    [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]])
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -20990,7 +20906,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21096,7 +21012,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK18-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK18-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK18-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK18-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -21136,7 +21052,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK18-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK18-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK18-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK18:       omp_offload.failed:
@@ -21154,7 +21070,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK18-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l121.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l121.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK18-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK18-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK18:       omp_offload.failed5:
@@ -21184,7 +21100,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK18-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l126.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l126.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK18-NEXT:    [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
 // CHECK18-NEXT:    br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK18:       omp_offload.failed11:
@@ -21202,7 +21118,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK18-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l131.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l131.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK18-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
 // CHECK18-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK18:       omp_offload.failed17:
@@ -21232,7 +21148,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
 // CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK18-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l136.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l136.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK18-NEXT:    [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0
 // CHECK18-NEXT:    br i1 [[TMP58]], label [[OMP_OFFLOAD_FAILED25:%.*]], label [[OMP_OFFLOAD_CONT26:%.*]]
 // CHECK18:       omp_offload.failed25:
@@ -21248,11 +21164,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK18-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK18-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21300,7 +21216,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK18-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK18-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -21315,7 +21231,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21398,11 +21314,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK18-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK18-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21450,7 +21366,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK18-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK18-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -21465,7 +21381,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21559,11 +21475,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK18-NEXT:    [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21619,7 +21535,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4
 // CHECK18-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -21634,7 +21550,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21742,11 +21658,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK18-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK18-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21794,7 +21710,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK18-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
 // CHECK18-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -21809,7 +21725,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21901,11 +21817,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK18-NEXT:    [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -21961,7 +21877,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4
 // CHECK18-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]])
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -21976,7 +21892,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -22056,7 +21972,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK18-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK18-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK18-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK18-NEXT:    ret void
@@ -22129,14 +22045,14 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK19-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK19-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK19-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK19-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK19-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK19-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK19-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK19-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK19-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK19-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK19-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -22145,298 +22061,284 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK19-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK19-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK19-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK19-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK19-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK19-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK19-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK19-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK19-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK19-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK19-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK19-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK19-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK19-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK19-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK19-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK19-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK19-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK19-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK19-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK19-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK19-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK19-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK19-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK19-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK19-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK19-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK19-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK19-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK19-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK19-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK19-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK19:       omp_offload.failed:
 // CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK19:       omp_offload.cont:
-// CHECK19-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK19-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK19-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK19-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK19-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK19-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK19-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK19-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK19-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i32 24, i1 false)
 // CHECK19-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK19-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK19-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK19-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK19-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK19-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK19-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK19-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK19-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK19-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK19-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK19-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK19-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK19-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK19-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK19-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK19-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK19-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK19-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK19-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK19-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK19-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK19-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK19-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK19-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK19-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK19-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK19-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK19-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK19-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK19-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK19-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK19-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK19-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK19-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK19-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK19-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK19-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP61]])
-// CHECK19-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK19-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK19-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK19-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK19-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP59]])
+// CHECK19-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK19-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK19:       omp_offload.failed15:
-// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK19:       omp_offload.cont16:
-// CHECK19-NEXT:    [[TMP64:%.*]] = load i32, i32* [[M]], align 4
-// CHECK19-NEXT:    store i32 [[TMP64]], i32* [[M_CASTED]], align 4
-// CHECK19-NEXT:    [[TMP65:%.*]] = load i32, i32* [[M_CASTED]], align 4
-// CHECK19-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP66]], i32* [[N_CASTED17]], align 4
-// CHECK19-NEXT:    [[TMP67:%.*]] = load i32, i32* [[N_CASTED17]], align 4
-// CHECK19-NEXT:    [[TMP68:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK19-NEXT:    [[TMP69:%.*]] = sext i32 [[TMP68]] to i64
-// CHECK19-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP65]], i32* [[TMP71]], align 4
-// CHECK19-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP65]], i32* [[TMP73]], align 4
-// CHECK19-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 4, i64* [[TMP74]], align 4
-// CHECK19-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
-// CHECK19-NEXT:    store i8* null, i8** [[TMP75]], align 4
-// CHECK19-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP62:%.*]] = load i32, i32* [[M]], align 4
+// CHECK19-NEXT:    store i32 [[TMP62]], i32* [[M_CASTED]], align 4
+// CHECK19-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M_CASTED]], align 4
+// CHECK19-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP64]], i32* [[N_CASTED17]], align 4
+// CHECK19-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N_CASTED17]], align 4
+// CHECK19-NEXT:    [[TMP66:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK19-NEXT:    [[TMP67:%.*]] = sext i32 [[TMP66]] to i64
+// CHECK19-NEXT:    [[TMP68:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES21]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP68]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i32 32, i1 false)
+// CHECK19-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP63]], i32* [[TMP70]], align 4
+// CHECK19-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP63]], i32* [[TMP72]], align 4
+// CHECK19-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
+// CHECK19-NEXT:    store i8* null, i8** [[TMP73]], align 4
+// CHECK19-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP65]], i32* [[TMP75]], align 4
+// CHECK19-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
 // CHECK19-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP67]], i32* [[TMP77]], align 4
-// CHECK19-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP67]], i32* [[TMP79]], align 4
-// CHECK19-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP80]], align 4
-// CHECK19-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP81]], align 4
-// CHECK19-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP83]], align 4
-// CHECK19-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP85]], align 4
-// CHECK19-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 4, i64* [[TMP86]], align 4
-// CHECK19-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP87]], align 4
-// CHECK19-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP89]], align 4
-// CHECK19-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP91]], align 4
-// CHECK19-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 3
-// CHECK19-NEXT:    store i64 [[TMP69]], i64* [[TMP92]], align 4
-// CHECK19-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
-// CHECK19-NEXT:    store i8* null, i8** [[TMP93]], align 4
-// CHECK19-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP97:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP97]], i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK19-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK19-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP98]], 0
+// CHECK19-NEXT:    store i32 [[TMP65]], i32* [[TMP77]], align 4
+// CHECK19-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP78]], align 4
+// CHECK19-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
+// CHECK19-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP82]], align 4
+// CHECK19-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP83]], align 4
+// CHECK19-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP85]], align 4
+// CHECK19-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP87]], align 4
+// CHECK19-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 3
+// CHECK19-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 4
+// CHECK19-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
+// CHECK19-NEXT:    store i8* null, i8** [[TMP89]], align 4
+// CHECK19-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP93:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP93]], i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK19-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK19-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP94]], 0
 // CHECK19-NEXT:    [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1
 // CHECK19-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[DIV26]], 1
 // CHECK19-NEXT:    store i32 [[SUB27]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK19-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK19-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP99]], 1
-// CHECK19-NEXT:    [[TMP100:%.*]] = zext i32 [[ADD28]] to i64
-// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP100]])
-// CHECK19-NEXT:    [[TMP101:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158.region_id, i32 4, i8** [[TMP94]], i8** [[TMP95]], i64* [[TMP96]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP102:%.*]] = icmp ne i32 [[TMP101]], 0
-// CHECK19-NEXT:    br i1 [[TMP102]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
+// CHECK19-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK19-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP95]], 1
+// CHECK19-NEXT:    [[TMP96:%.*]] = zext i32 [[ADD28]] to i64
+// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP96]])
+// CHECK19-NEXT:    [[TMP97:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158.region_id, i32 4, i8** [[TMP90]], i8** [[TMP91]], i64* [[TMP92]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP98:%.*]] = icmp ne i32 [[TMP97]], 0
+// CHECK19-NEXT:    br i1 [[TMP98]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
 // CHECK19:       omp_offload.failed29:
-// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158(i32 [[TMP65]], i32 [[TMP67]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158(i32 [[TMP63]], i32 [[TMP65]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT30]]
 // CHECK19:       omp_offload.cont30:
-// CHECK19-NEXT:    [[TMP103:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP103]], i32* [[N_CASTED31]], align 4
-// CHECK19-NEXT:    [[TMP104:%.*]] = load i32, i32* [[N_CASTED31]], align 4
-// CHECK19-NEXT:    [[TMP105:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK19-NEXT:    [[TMP106:%.*]] = sext i32 [[TMP105]] to i64
-// CHECK19-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP104]], i32* [[TMP108]], align 4
-// CHECK19-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP99:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP99]], i32* [[N_CASTED31]], align 4
+// CHECK19-NEXT:    [[TMP100:%.*]] = load i32, i32* [[N_CASTED31]], align 4
+// CHECK19-NEXT:    [[TMP101:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK19-NEXT:    [[TMP102:%.*]] = sext i32 [[TMP101]] to i64
+// CHECK19-NEXT:    [[TMP103:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES35]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP103]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i32 24, i1 false)
+// CHECK19-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP100]], i32* [[TMP105]], align 4
+// CHECK19-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP100]], i32* [[TMP107]], align 4
+// CHECK19-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 0
+// CHECK19-NEXT:    store i8* null, i8** [[TMP108]], align 4
+// CHECK19-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 1
 // CHECK19-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP104]], i32* [[TMP110]], align 4
-// CHECK19-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 4, i64* [[TMP111]], align 4
-// CHECK19-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 0
-// CHECK19-NEXT:    store i8* null, i8** [[TMP112]], align 4
-// CHECK19-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP114]], align 4
-// CHECK19-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP116]], align 4
-// CHECK19-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP117]], align 4
-// CHECK19-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP118]], align 4
-// CHECK19-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP120]], align 4
-// CHECK19-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP122]], align 4
-// CHECK19-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 [[TMP106]], i64* [[TMP123]], align 4
-// CHECK19-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP124]], align 4
-// CHECK19-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP128:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP128]], i32* [[DOTCAPTURE_EXPR_37]], align 4
-// CHECK19-NEXT:    [[TMP129:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4
-// CHECK19-NEXT:    [[SUB39:%.*]] = sub nsw i32 [[TMP129]], 0
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP110]], align 4
+// CHECK19-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP112]], align 4
+// CHECK19-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP113]], align 4
+// CHECK19-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP115]], align 4
+// CHECK19-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP117]], align 4
+// CHECK19-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 2
+// CHECK19-NEXT:    store i64 [[TMP102]], i64* [[TMP118]], align 4
+// CHECK19-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP119]], align 4
+// CHECK19-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP123:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP123]], i32* [[DOTCAPTURE_EXPR_37]], align 4
+// CHECK19-NEXT:    [[TMP124:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4
+// CHECK19-NEXT:    [[SUB39:%.*]] = sub nsw i32 [[TMP124]], 0
 // CHECK19-NEXT:    [[DIV40:%.*]] = sdiv i32 [[SUB39]], 1
 // CHECK19-NEXT:    [[SUB41:%.*]] = sub nsw i32 [[DIV40]], 1
 // CHECK19-NEXT:    store i32 [[SUB41]], i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK19-NEXT:    [[TMP130:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK19-NEXT:    [[ADD42:%.*]] = add nsw i32 [[TMP130]], 1
-// CHECK19-NEXT:    [[TMP131:%.*]] = zext i32 [[ADD42]] to i64
-// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP131]])
-// CHECK19-NEXT:    [[TMP132:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163.region_id, i32 3, i8** [[TMP125]], i8** [[TMP126]], i64* [[TMP127]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP133:%.*]] = icmp ne i32 [[TMP132]], 0
-// CHECK19-NEXT:    br i1 [[TMP133]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
+// CHECK19-NEXT:    [[TMP125:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
+// CHECK19-NEXT:    [[ADD42:%.*]] = add nsw i32 [[TMP125]], 1
+// CHECK19-NEXT:    [[TMP126:%.*]] = zext i32 [[ADD42]] to i64
+// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP126]])
+// CHECK19-NEXT:    [[TMP127:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163.region_id, i32 3, i8** [[TMP120]], i8** [[TMP121]], i64* [[TMP122]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP128:%.*]] = icmp ne i32 [[TMP127]], 0
+// CHECK19-NEXT:    br i1 [[TMP128]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
 // CHECK19:       omp_offload.failed43:
-// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163(i32 [[TMP104]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163(i32 [[TMP100]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT44]]
 // CHECK19:       omp_offload.cont44:
-// CHECK19-NEXT:    [[TMP134:%.*]] = load i32, i32* [[M]], align 4
-// CHECK19-NEXT:    store i32 [[TMP134]], i32* [[M_CASTED45]], align 4
-// CHECK19-NEXT:    [[TMP135:%.*]] = load i32, i32* [[M_CASTED45]], align 4
-// CHECK19-NEXT:    [[TMP136:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP136]], i32* [[N_CASTED46]], align 4
-// CHECK19-NEXT:    [[TMP137:%.*]] = load i32, i32* [[N_CASTED46]], align 4
-// CHECK19-NEXT:    [[TMP138:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK19-NEXT:    [[TMP139:%.*]] = sext i32 [[TMP138]] to i64
-// CHECK19-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP135]], i32* [[TMP141]], align 4
-// CHECK19-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP135]], i32* [[TMP143]], align 4
-// CHECK19-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 4, i64* [[TMP144]], align 4
-// CHECK19-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP129:%.*]] = load i32, i32* [[M]], align 4
+// CHECK19-NEXT:    store i32 [[TMP129]], i32* [[M_CASTED45]], align 4
+// CHECK19-NEXT:    [[TMP130:%.*]] = load i32, i32* [[M_CASTED45]], align 4
+// CHECK19-NEXT:    [[TMP131:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP131]], i32* [[N_CASTED46]], align 4
+// CHECK19-NEXT:    [[TMP132:%.*]] = load i32, i32* [[N_CASTED46]], align 4
+// CHECK19-NEXT:    [[TMP133:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK19-NEXT:    [[TMP134:%.*]] = sext i32 [[TMP133]] to i64
+// CHECK19-NEXT:    [[TMP135:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES50]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP135]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i32 32, i1 false)
+// CHECK19-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP130]], i32* [[TMP137]], align 4
+// CHECK19-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP130]], i32* [[TMP139]], align 4
+// CHECK19-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 0
+// CHECK19-NEXT:    store i8* null, i8** [[TMP140]], align 4
+// CHECK19-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP132]], i32* [[TMP142]], align 4
+// CHECK19-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP132]], i32* [[TMP144]], align 4
+// CHECK19-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 1
 // CHECK19-NEXT:    store i8* null, i8** [[TMP145]], align 4
-// CHECK19-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 2
 // CHECK19-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP137]], i32* [[TMP147]], align 4
-// CHECK19-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 1
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP147]], align 4
+// CHECK19-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 2
 // CHECK19-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP137]], i32* [[TMP149]], align 4
-// CHECK19-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP150]], align 4
-// CHECK19-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP151]], align 4
-// CHECK19-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP153]], align 4
-// CHECK19-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP155]], align 4
-// CHECK19-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 4, i64* [[TMP156]], align 4
-// CHECK19-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP157]], align 4
-// CHECK19-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP159]], align 4
-// CHECK19-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP161:%.*]] = bitcast i8** [[TMP160]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP161]], align 4
-// CHECK19-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 3
-// CHECK19-NEXT:    store i64 [[TMP139]], i64* [[TMP162]], align 4
-// CHECK19-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 3
-// CHECK19-NEXT:    store i8* null, i8** [[TMP163]], align 4
-// CHECK19-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP167:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP167]], i32* [[DOTCAPTURE_EXPR_52]], align 4
-// CHECK19-NEXT:    [[TMP168:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_52]], align 4
-// CHECK19-NEXT:    [[SUB54:%.*]] = sub nsw i32 [[TMP168]], 0
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP149]], align 4
+// CHECK19-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP150]], align 4
+// CHECK19-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP152:%.*]] = bitcast i8** [[TMP151]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP152]], align 4
+// CHECK19-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP154:%.*]] = bitcast i8** [[TMP153]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP154]], align 4
+// CHECK19-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 3
+// CHECK19-NEXT:    store i64 [[TMP134]], i64* [[TMP155]], align 4
+// CHECK19-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 3
+// CHECK19-NEXT:    store i8* null, i8** [[TMP156]], align 4
+// CHECK19-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP160:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP160]], i32* [[DOTCAPTURE_EXPR_52]], align 4
+// CHECK19-NEXT:    [[TMP161:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_52]], align 4
+// CHECK19-NEXT:    [[SUB54:%.*]] = sub nsw i32 [[TMP161]], 0
 // CHECK19-NEXT:    [[DIV55:%.*]] = sdiv i32 [[SUB54]], 1
 // CHECK19-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[DIV55]], 1
 // CHECK19-NEXT:    store i32 [[SUB56]], i32* [[DOTCAPTURE_EXPR_53]], align 4
-// CHECK19-NEXT:    [[TMP169:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_53]], align 4
-// CHECK19-NEXT:    [[ADD57:%.*]] = add nsw i32 [[TMP169]], 1
-// CHECK19-NEXT:    [[TMP170:%.*]] = zext i32 [[ADD57]] to i64
-// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP170]])
-// CHECK19-NEXT:    [[TMP171:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168.region_id, i32 4, i8** [[TMP164]], i8** [[TMP165]], i64* [[TMP166]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP172:%.*]] = icmp ne i32 [[TMP171]], 0
-// CHECK19-NEXT:    br i1 [[TMP172]], label [[OMP_OFFLOAD_FAILED58:%.*]], label [[OMP_OFFLOAD_CONT59:%.*]]
+// CHECK19-NEXT:    [[TMP162:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_53]], align 4
+// CHECK19-NEXT:    [[ADD57:%.*]] = add nsw i32 [[TMP162]], 1
+// CHECK19-NEXT:    [[TMP163:%.*]] = zext i32 [[ADD57]] to i64
+// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP163]])
+// CHECK19-NEXT:    [[TMP164:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168.region_id, i32 4, i8** [[TMP157]], i8** [[TMP158]], i64* [[TMP159]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP165:%.*]] = icmp ne i32 [[TMP164]], 0
+// CHECK19-NEXT:    br i1 [[TMP165]], label [[OMP_OFFLOAD_FAILED58:%.*]], label [[OMP_OFFLOAD_CONT59:%.*]]
 // CHECK19:       omp_offload.failed58:
-// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168(i32 [[TMP135]], i32 [[TMP137]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168(i32 [[TMP130]], i32 [[TMP132]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT59]]
 // CHECK19:       omp_offload.cont59:
-// CHECK19-NEXT:    [[TMP173:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK19-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP173]])
+// CHECK19-NEXT:    [[TMP166:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK19-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP166]])
 // CHECK19-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK19-NEXT:    [[TMP174:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP174]])
-// CHECK19-NEXT:    [[TMP175:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK19-NEXT:    ret i32 [[TMP175]]
+// CHECK19-NEXT:    [[TMP167:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP167]])
+// CHECK19-NEXT:    [[TMP168:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK19-NEXT:    ret i32 [[TMP168]]
 //
 //
 // CHECK19-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148
@@ -22871,11 +22773,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK19-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK19-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -22953,7 +22855,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK19-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]])
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -22995,7 +22897,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -23110,11 +23012,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 4
 // CHECK19-NEXT:    [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
 // CHECK19-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -23184,7 +23086,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.inner.for.body:
 // CHECK19-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK19-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]])
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -23203,7 +23105,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -23321,11 +23223,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK19-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK19-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -23401,7 +23303,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP18]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK19-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]])
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -23420,7 +23322,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -23522,7 +23424,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK19-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK19-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK19-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -23562,7 +23464,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK19-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK19-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK19-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK19:       omp_offload.failed:
@@ -23580,7 +23482,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK19-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l121.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l121.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK19-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK19-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK19:       omp_offload.failed5:
@@ -23609,7 +23511,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK19-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l126.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l126.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK19-NEXT:    [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
 // CHECK19-NEXT:    br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK19:       omp_offload.failed11:
@@ -23627,7 +23529,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK19-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l131.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l131.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK19-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
 // CHECK19-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK19:       omp_offload.failed17:
@@ -23656,7 +23558,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
 // CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK19-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l136.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l136.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK19-NEXT:    [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0
 // CHECK19-NEXT:    br i1 [[TMP58]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]]
 // CHECK19:       omp_offload.failed24:
@@ -23672,11 +23574,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK19-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK19-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -23722,7 +23624,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.inner.for.body:
 // CHECK19-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK19-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -23737,7 +23639,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -23817,11 +23719,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK19-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK19-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -23867,7 +23769,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.inner.for.body:
 // CHECK19-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK19-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -23882,7 +23784,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -23971,11 +23873,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK19-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK19-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -24027,7 +23929,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK19-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -24042,7 +23944,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -24144,11 +24046,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK19-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK19-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -24194,7 +24096,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.inner.for.body:
 // CHECK19-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK19-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -24209,7 +24111,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -24296,11 +24198,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK19-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK19-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -24352,7 +24254,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK19-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK19-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -24367,7 +24269,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -24443,7 +24345,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK19-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK19-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK19-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK19-NEXT:    ret void
@@ -24516,14 +24418,14 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK20-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK20-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK20-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK20-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK20-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK20-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK20-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK20-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK20-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK20-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK20-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -24532,298 +24434,284 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK20-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK20-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK20-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK20-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK20-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK20-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK20-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK20-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK20-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK20-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK20-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK20-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK20-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK20-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK20-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK20-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK20-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK20-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK20-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK20-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK20-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK20-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK20-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK20-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK20-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK20-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK20-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK20-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK20-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK20-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK20-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK20-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK20:       omp_offload.failed:
 // CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK20:       omp_offload.cont:
-// CHECK20-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK20-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK20-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK20-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK20-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK20-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK20-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK20-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK20-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i32 24, i1 false)
 // CHECK20-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK20-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK20-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK20-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK20-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK20-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK20-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK20-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK20-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK20-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK20-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK20-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK20-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK20-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK20-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK20-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK20-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK20-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK20-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK20-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK20-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK20-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK20-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK20-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK20-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK20-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK20-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK20-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK20-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK20-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK20-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK20-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK20-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK20-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK20-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK20-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK20-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK20-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP61]])
-// CHECK20-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK20-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK20-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK20-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK20-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP59]])
+// CHECK20-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK20-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK20:       omp_offload.failed15:
-// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK20:       omp_offload.cont16:
-// CHECK20-NEXT:    [[TMP64:%.*]] = load i32, i32* [[M]], align 4
-// CHECK20-NEXT:    store i32 [[TMP64]], i32* [[M_CASTED]], align 4
-// CHECK20-NEXT:    [[TMP65:%.*]] = load i32, i32* [[M_CASTED]], align 4
-// CHECK20-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP66]], i32* [[N_CASTED17]], align 4
-// CHECK20-NEXT:    [[TMP67:%.*]] = load i32, i32* [[N_CASTED17]], align 4
-// CHECK20-NEXT:    [[TMP68:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK20-NEXT:    [[TMP69:%.*]] = sext i32 [[TMP68]] to i64
-// CHECK20-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP65]], i32* [[TMP71]], align 4
-// CHECK20-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP65]], i32* [[TMP73]], align 4
-// CHECK20-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 4, i64* [[TMP74]], align 4
-// CHECK20-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
-// CHECK20-NEXT:    store i8* null, i8** [[TMP75]], align 4
-// CHECK20-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP62:%.*]] = load i32, i32* [[M]], align 4
+// CHECK20-NEXT:    store i32 [[TMP62]], i32* [[M_CASTED]], align 4
+// CHECK20-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M_CASTED]], align 4
+// CHECK20-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP64]], i32* [[N_CASTED17]], align 4
+// CHECK20-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N_CASTED17]], align 4
+// CHECK20-NEXT:    [[TMP66:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK20-NEXT:    [[TMP67:%.*]] = sext i32 [[TMP66]] to i64
+// CHECK20-NEXT:    [[TMP68:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES21]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP68]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i32 32, i1 false)
+// CHECK20-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP63]], i32* [[TMP70]], align 4
+// CHECK20-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP63]], i32* [[TMP72]], align 4
+// CHECK20-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
+// CHECK20-NEXT:    store i8* null, i8** [[TMP73]], align 4
+// CHECK20-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP65]], i32* [[TMP75]], align 4
+// CHECK20-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
 // CHECK20-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP67]], i32* [[TMP77]], align 4
-// CHECK20-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP67]], i32* [[TMP79]], align 4
-// CHECK20-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP80]], align 4
-// CHECK20-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP81]], align 4
-// CHECK20-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP83]], align 4
-// CHECK20-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP85]], align 4
-// CHECK20-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 4, i64* [[TMP86]], align 4
-// CHECK20-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP87]], align 4
-// CHECK20-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP89]], align 4
-// CHECK20-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP91]], align 4
-// CHECK20-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 3
-// CHECK20-NEXT:    store i64 [[TMP69]], i64* [[TMP92]], align 4
-// CHECK20-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
-// CHECK20-NEXT:    store i8* null, i8** [[TMP93]], align 4
-// CHECK20-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP97:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP97]], i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK20-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK20-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP98]], 0
+// CHECK20-NEXT:    store i32 [[TMP65]], i32* [[TMP77]], align 4
+// CHECK20-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP78]], align 4
+// CHECK20-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
+// CHECK20-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP82]], align 4
+// CHECK20-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP83]], align 4
+// CHECK20-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP85]], align 4
+// CHECK20-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP87]], align 4
+// CHECK20-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 3
+// CHECK20-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 4
+// CHECK20-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
+// CHECK20-NEXT:    store i8* null, i8** [[TMP89]], align 4
+// CHECK20-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP93:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP93]], i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK20-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK20-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP94]], 0
 // CHECK20-NEXT:    [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1
 // CHECK20-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[DIV26]], 1
 // CHECK20-NEXT:    store i32 [[SUB27]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK20-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK20-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP99]], 1
-// CHECK20-NEXT:    [[TMP100:%.*]] = zext i32 [[ADD28]] to i64
-// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP100]])
-// CHECK20-NEXT:    [[TMP101:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158.region_id, i32 4, i8** [[TMP94]], i8** [[TMP95]], i64* [[TMP96]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP102:%.*]] = icmp ne i32 [[TMP101]], 0
-// CHECK20-NEXT:    br i1 [[TMP102]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
+// CHECK20-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK20-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP95]], 1
+// CHECK20-NEXT:    [[TMP96:%.*]] = zext i32 [[ADD28]] to i64
+// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP96]])
+// CHECK20-NEXT:    [[TMP97:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158.region_id, i32 4, i8** [[TMP90]], i8** [[TMP91]], i64* [[TMP92]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP98:%.*]] = icmp ne i32 [[TMP97]], 0
+// CHECK20-NEXT:    br i1 [[TMP98]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
 // CHECK20:       omp_offload.failed29:
-// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158(i32 [[TMP65]], i32 [[TMP67]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l158(i32 [[TMP63]], i32 [[TMP65]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT30]]
 // CHECK20:       omp_offload.cont30:
-// CHECK20-NEXT:    [[TMP103:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP103]], i32* [[N_CASTED31]], align 4
-// CHECK20-NEXT:    [[TMP104:%.*]] = load i32, i32* [[N_CASTED31]], align 4
-// CHECK20-NEXT:    [[TMP105:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK20-NEXT:    [[TMP106:%.*]] = sext i32 [[TMP105]] to i64
-// CHECK20-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP104]], i32* [[TMP108]], align 4
-// CHECK20-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP99:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP99]], i32* [[N_CASTED31]], align 4
+// CHECK20-NEXT:    [[TMP100:%.*]] = load i32, i32* [[N_CASTED31]], align 4
+// CHECK20-NEXT:    [[TMP101:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK20-NEXT:    [[TMP102:%.*]] = sext i32 [[TMP101]] to i64
+// CHECK20-NEXT:    [[TMP103:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES35]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP103]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i32 24, i1 false)
+// CHECK20-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP100]], i32* [[TMP105]], align 4
+// CHECK20-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP100]], i32* [[TMP107]], align 4
+// CHECK20-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 0
+// CHECK20-NEXT:    store i8* null, i8** [[TMP108]], align 4
+// CHECK20-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 1
 // CHECK20-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP104]], i32* [[TMP110]], align 4
-// CHECK20-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 4, i64* [[TMP111]], align 4
-// CHECK20-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 0
-// CHECK20-NEXT:    store i8* null, i8** [[TMP112]], align 4
-// CHECK20-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP114]], align 4
-// CHECK20-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP116]], align 4
-// CHECK20-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP117]], align 4
-// CHECK20-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP118]], align 4
-// CHECK20-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP120]], align 4
-// CHECK20-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP122]], align 4
-// CHECK20-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 [[TMP106]], i64* [[TMP123]], align 4
-// CHECK20-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP124]], align 4
-// CHECK20-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP128:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP128]], i32* [[DOTCAPTURE_EXPR_37]], align 4
-// CHECK20-NEXT:    [[TMP129:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4
-// CHECK20-NEXT:    [[SUB39:%.*]] = sub nsw i32 [[TMP129]], 0
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP110]], align 4
+// CHECK20-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP112]], align 4
+// CHECK20-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP113]], align 4
+// CHECK20-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP115]], align 4
+// CHECK20-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP117]], align 4
+// CHECK20-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 2
+// CHECK20-NEXT:    store i64 [[TMP102]], i64* [[TMP118]], align 4
+// CHECK20-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP119]], align 4
+// CHECK20-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP123:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP123]], i32* [[DOTCAPTURE_EXPR_37]], align 4
+// CHECK20-NEXT:    [[TMP124:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4
+// CHECK20-NEXT:    [[SUB39:%.*]] = sub nsw i32 [[TMP124]], 0
 // CHECK20-NEXT:    [[DIV40:%.*]] = sdiv i32 [[SUB39]], 1
 // CHECK20-NEXT:    [[SUB41:%.*]] = sub nsw i32 [[DIV40]], 1
 // CHECK20-NEXT:    store i32 [[SUB41]], i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK20-NEXT:    [[TMP130:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK20-NEXT:    [[ADD42:%.*]] = add nsw i32 [[TMP130]], 1
-// CHECK20-NEXT:    [[TMP131:%.*]] = zext i32 [[ADD42]] to i64
-// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP131]])
-// CHECK20-NEXT:    [[TMP132:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163.region_id, i32 3, i8** [[TMP125]], i8** [[TMP126]], i64* [[TMP127]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP133:%.*]] = icmp ne i32 [[TMP132]], 0
-// CHECK20-NEXT:    br i1 [[TMP133]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
+// CHECK20-NEXT:    [[TMP125:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
+// CHECK20-NEXT:    [[ADD42:%.*]] = add nsw i32 [[TMP125]], 1
+// CHECK20-NEXT:    [[TMP126:%.*]] = zext i32 [[ADD42]] to i64
+// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP126]])
+// CHECK20-NEXT:    [[TMP127:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163.region_id, i32 3, i8** [[TMP120]], i8** [[TMP121]], i64* [[TMP122]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP128:%.*]] = icmp ne i32 [[TMP127]], 0
+// CHECK20-NEXT:    br i1 [[TMP128]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
 // CHECK20:       omp_offload.failed43:
-// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163(i32 [[TMP104]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l163(i32 [[TMP100]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT44]]
 // CHECK20:       omp_offload.cont44:
-// CHECK20-NEXT:    [[TMP134:%.*]] = load i32, i32* [[M]], align 4
-// CHECK20-NEXT:    store i32 [[TMP134]], i32* [[M_CASTED45]], align 4
-// CHECK20-NEXT:    [[TMP135:%.*]] = load i32, i32* [[M_CASTED45]], align 4
-// CHECK20-NEXT:    [[TMP136:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP136]], i32* [[N_CASTED46]], align 4
-// CHECK20-NEXT:    [[TMP137:%.*]] = load i32, i32* [[N_CASTED46]], align 4
-// CHECK20-NEXT:    [[TMP138:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK20-NEXT:    [[TMP139:%.*]] = sext i32 [[TMP138]] to i64
-// CHECK20-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP135]], i32* [[TMP141]], align 4
-// CHECK20-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP135]], i32* [[TMP143]], align 4
-// CHECK20-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 4, i64* [[TMP144]], align 4
-// CHECK20-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP129:%.*]] = load i32, i32* [[M]], align 4
+// CHECK20-NEXT:    store i32 [[TMP129]], i32* [[M_CASTED45]], align 4
+// CHECK20-NEXT:    [[TMP130:%.*]] = load i32, i32* [[M_CASTED45]], align 4
+// CHECK20-NEXT:    [[TMP131:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP131]], i32* [[N_CASTED46]], align 4
+// CHECK20-NEXT:    [[TMP132:%.*]] = load i32, i32* [[N_CASTED46]], align 4
+// CHECK20-NEXT:    [[TMP133:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK20-NEXT:    [[TMP134:%.*]] = sext i32 [[TMP133]] to i64
+// CHECK20-NEXT:    [[TMP135:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES50]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP135]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i32 32, i1 false)
+// CHECK20-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP130]], i32* [[TMP137]], align 4
+// CHECK20-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP130]], i32* [[TMP139]], align 4
+// CHECK20-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 0
+// CHECK20-NEXT:    store i8* null, i8** [[TMP140]], align 4
+// CHECK20-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP132]], i32* [[TMP142]], align 4
+// CHECK20-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP132]], i32* [[TMP144]], align 4
+// CHECK20-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 1
 // CHECK20-NEXT:    store i8* null, i8** [[TMP145]], align 4
-// CHECK20-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 2
 // CHECK20-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP137]], i32* [[TMP147]], align 4
-// CHECK20-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 1
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP147]], align 4
+// CHECK20-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 2
 // CHECK20-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP137]], i32* [[TMP149]], align 4
-// CHECK20-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP150]], align 4
-// CHECK20-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP151]], align 4
-// CHECK20-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP153]], align 4
-// CHECK20-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP155]], align 4
-// CHECK20-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 4, i64* [[TMP156]], align 4
-// CHECK20-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP157]], align 4
-// CHECK20-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP159]], align 4
-// CHECK20-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP161:%.*]] = bitcast i8** [[TMP160]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP161]], align 4
-// CHECK20-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 3
-// CHECK20-NEXT:    store i64 [[TMP139]], i64* [[TMP162]], align 4
-// CHECK20-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 3
-// CHECK20-NEXT:    store i8* null, i8** [[TMP163]], align 4
-// CHECK20-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP167:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP167]], i32* [[DOTCAPTURE_EXPR_52]], align 4
-// CHECK20-NEXT:    [[TMP168:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_52]], align 4
-// CHECK20-NEXT:    [[SUB54:%.*]] = sub nsw i32 [[TMP168]], 0
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP149]], align 4
+// CHECK20-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP150]], align 4
+// CHECK20-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP152:%.*]] = bitcast i8** [[TMP151]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP152]], align 4
+// CHECK20-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP154:%.*]] = bitcast i8** [[TMP153]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP154]], align 4
+// CHECK20-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 3
+// CHECK20-NEXT:    store i64 [[TMP134]], i64* [[TMP155]], align 4
+// CHECK20-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 3
+// CHECK20-NEXT:    store i8* null, i8** [[TMP156]], align 4
+// CHECK20-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP160:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP160]], i32* [[DOTCAPTURE_EXPR_52]], align 4
+// CHECK20-NEXT:    [[TMP161:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_52]], align 4
+// CHECK20-NEXT:    [[SUB54:%.*]] = sub nsw i32 [[TMP161]], 0
 // CHECK20-NEXT:    [[DIV55:%.*]] = sdiv i32 [[SUB54]], 1
 // CHECK20-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[DIV55]], 1
 // CHECK20-NEXT:    store i32 [[SUB56]], i32* [[DOTCAPTURE_EXPR_53]], align 4
-// CHECK20-NEXT:    [[TMP169:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_53]], align 4
-// CHECK20-NEXT:    [[ADD57:%.*]] = add nsw i32 [[TMP169]], 1
-// CHECK20-NEXT:    [[TMP170:%.*]] = zext i32 [[ADD57]] to i64
-// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP170]])
-// CHECK20-NEXT:    [[TMP171:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168.region_id, i32 4, i8** [[TMP164]], i8** [[TMP165]], i64* [[TMP166]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP172:%.*]] = icmp ne i32 [[TMP171]], 0
-// CHECK20-NEXT:    br i1 [[TMP172]], label [[OMP_OFFLOAD_FAILED58:%.*]], label [[OMP_OFFLOAD_CONT59:%.*]]
+// CHECK20-NEXT:    [[TMP162:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_53]], align 4
+// CHECK20-NEXT:    [[ADD57:%.*]] = add nsw i32 [[TMP162]], 1
+// CHECK20-NEXT:    [[TMP163:%.*]] = zext i32 [[ADD57]] to i64
+// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP163]])
+// CHECK20-NEXT:    [[TMP164:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168.region_id, i32 4, i8** [[TMP157]], i8** [[TMP158]], i64* [[TMP159]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP165:%.*]] = icmp ne i32 [[TMP164]], 0
+// CHECK20-NEXT:    br i1 [[TMP165]], label [[OMP_OFFLOAD_FAILED58:%.*]], label [[OMP_OFFLOAD_CONT59:%.*]]
 // CHECK20:       omp_offload.failed58:
-// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168(i32 [[TMP135]], i32 [[TMP137]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l168(i32 [[TMP130]], i32 [[TMP132]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT59]]
 // CHECK20:       omp_offload.cont59:
-// CHECK20-NEXT:    [[TMP173:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK20-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP173]])
+// CHECK20-NEXT:    [[TMP166:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK20-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP166]])
 // CHECK20-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK20-NEXT:    [[TMP174:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP174]])
-// CHECK20-NEXT:    [[TMP175:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK20-NEXT:    ret i32 [[TMP175]]
+// CHECK20-NEXT:    [[TMP167:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP167]])
+// CHECK20-NEXT:    [[TMP168:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK20-NEXT:    ret i32 [[TMP168]]
 //
 //
 // CHECK20-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l148
@@ -25258,11 +25146,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK20-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK20-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -25340,7 +25228,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK20-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]])
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -25382,7 +25270,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -25497,11 +25385,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 4
 // CHECK20-NEXT:    [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
 // CHECK20-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -25571,7 +25459,7 @@ int main (int argc, char **argv) {
 // CHECK20:       omp.inner.for.body:
 // CHECK20-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK20-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]])
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -25590,7 +25478,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -25708,11 +25596,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK20-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK20-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -25788,7 +25676,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP18]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK20-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]])
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -25807,7 +25695,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -25909,7 +25797,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK20-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK20-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK20-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK20-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -25949,7 +25837,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK20-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l116.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK20-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK20-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK20:       omp_offload.failed:
@@ -25967,7 +25855,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK20-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l121.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l121.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK20-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK20-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK20:       omp_offload.failed5:
@@ -25996,7 +25884,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK20-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l126.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l126.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK20-NEXT:    [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
 // CHECK20-NEXT:    br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK20:       omp_offload.failed11:
@@ -26014,7 +25902,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK20-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l131.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l131.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK20-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
 // CHECK20-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK20:       omp_offload.failed17:
@@ -26043,7 +25931,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
 // CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK20-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l136.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l136.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK20-NEXT:    [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0
 // CHECK20-NEXT:    br i1 [[TMP58]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]]
 // CHECK20:       omp_offload.failed24:
@@ -26059,11 +25947,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK20-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK20-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26109,7 +25997,7 @@ int main (int argc, char **argv) {
 // CHECK20:       omp.inner.for.body:
 // CHECK20-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK20-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -26124,7 +26012,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26204,11 +26092,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK20-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK20-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26254,7 +26142,7 @@ int main (int argc, char **argv) {
 // CHECK20:       omp.inner.for.body:
 // CHECK20-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK20-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -26269,7 +26157,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26358,11 +26246,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK20-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK20-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26414,7 +26302,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK20-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -26429,7 +26317,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26531,11 +26419,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK20-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK20-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26581,7 +26469,7 @@ int main (int argc, char **argv) {
 // CHECK20:       omp.inner.for.body:
 // CHECK20-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4
 // CHECK20-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -26596,7 +26484,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26683,11 +26571,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK20-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK20-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26739,7 +26627,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4
 // CHECK20-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK20-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]])
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
@@ -26754,7 +26642,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26830,7 +26718,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK20-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK20-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK20-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK20-NEXT:    ret void

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
index 4b44d910afee8..24f66d1b7a398 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
@@ -3297,14 +3297,14 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store i32 [[TMP5]], i32* [[CONV1]], align 4
 // CHECK9-NEXT:    [[TMP6:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK9-NEXT:    [[TMP7:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK9-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP9]], align 8
-// CHECK9-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP11]], align 8
-// CHECK9-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 8, i64* [[TMP12]], align 8
+// CHECK9-NEXT:    [[TMP8:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP8]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes to i8*), i64 32, i1 false)
+// CHECK9-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP10]], align 8
+// CHECK9-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP12]], align 8
 // CHECK9-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK9-NEXT:    store i8* null, i8** [[TMP13]], align 8
 // CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -3323,46 +3323,42 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
 // CHECK9-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP23]], align 8
-// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 4, i64* [[TMP24]], align 8
-// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP25]], align 8
-// CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP6]], i64* [[TMP27]], align 8
-// CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP6]], i64* [[TMP29]], align 8
-// CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK9-NEXT:    store i64 4, i64* [[TMP30]], align 8
-// CHECK9-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP31]], align 8
-// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP35:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP35]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP36]], 0
+// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP24]], align 8
+// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP6]], i64* [[TMP26]], align 8
+// CHECK9-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP6]], i64* [[TMP28]], align 8
+// CHECK9-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP29]], align 8
+// CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP34]], 0
 // CHECK9-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK9-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK9-NEXT:    store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK9-NEXT:    [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP37]], 1
-// CHECK9-NEXT:    [[TMP38:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4:[0-9]+]], i64 -1, i64 [[TMP38]])
-// CHECK9-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l74.region_id, i32 4, i8** [[TMP32]], i8** [[TMP33]], i64* [[TMP34]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0
-// CHECK9-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP35]], 1
+// CHECK9-NEXT:    [[TMP36:%.*]] = zext i32 [[ADD]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4:[0-9]+]], i64 -1, i64 [[TMP36]])
+// CHECK9-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l74.region_id, i32 4, i8** [[TMP30]], i8** [[TMP31]], i64* [[TMP32]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK9-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l74(i64 [[TMP1]], i32* [[VLA]], i64 [[TMP4]], i64 [[TMP6]]) #[[ATTR5:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
 // CHECK9-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 0
-// CHECK9-NEXT:    [[TMP41:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-// CHECK9-NEXT:    [[TMP42:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP42]])
-// CHECK9-NEXT:    ret i32 [[TMP41]]
+// CHECK9-NEXT:    [[TMP39:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK9-NEXT:    [[TMP40:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP40]])
+// CHECK9-NEXT:    ret i32 [[TMP39]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l74
@@ -3626,7 +3622,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK9-SAME: () #[[ATTR6:[0-9]+]] {
+// CHECK9-SAME: () #[[ATTR7:[0-9]+]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK9-NEXT:    ret void
@@ -3664,14 +3660,14 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    store i32 [[TMP5]], i32* [[CONV1]], align 4
 // CHECK10-NEXT:    [[TMP6:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK10-NEXT:    [[TMP7:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK10-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP9]], align 8
-// CHECK10-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP11]], align 8
-// CHECK10-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 8, i64* [[TMP12]], align 8
+// CHECK10-NEXT:    [[TMP8:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP8]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes to i8*), i64 32, i1 false)
+// CHECK10-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP10]], align 8
+// CHECK10-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP12]], align 8
 // CHECK10-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK10-NEXT:    store i8* null, i8** [[TMP13]], align 8
 // CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -3690,46 +3686,42 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
 // CHECK10-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP23]], align 8
-// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 4, i64* [[TMP24]], align 8
-// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP25]], align 8
-// CHECK10-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
-// CHECK10-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP6]], i64* [[TMP27]], align 8
-// CHECK10-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK10-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP6]], i64* [[TMP29]], align 8
-// CHECK10-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK10-NEXT:    store i64 4, i64* [[TMP30]], align 8
-// CHECK10-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK10-NEXT:    store i8* null, i8** [[TMP31]], align 8
-// CHECK10-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP35:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP35]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP36]], 0
+// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP24]], align 8
+// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP6]], i64* [[TMP26]], align 8
+// CHECK10-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP6]], i64* [[TMP28]], align 8
+// CHECK10-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK10-NEXT:    store i8* null, i8** [[TMP29]], align 8
+// CHECK10-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP34]], 0
 // CHECK10-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK10-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK10-NEXT:    store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK10-NEXT:    [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP37]], 1
-// CHECK10-NEXT:    [[TMP38:%.*]] = zext i32 [[ADD]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4:[0-9]+]], i64 -1, i64 [[TMP38]])
-// CHECK10-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l74.region_id, i32 4, i8** [[TMP32]], i8** [[TMP33]], i64* [[TMP34]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0
-// CHECK10-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK10-NEXT:    [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP35]], 1
+// CHECK10-NEXT:    [[TMP36:%.*]] = zext i32 [[ADD]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4:[0-9]+]], i64 -1, i64 [[TMP36]])
+// CHECK10-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l74.region_id, i32 4, i8** [[TMP30]], i8** [[TMP31]], i64* [[TMP32]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK10-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
 // CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l74(i64 [[TMP1]], i32* [[VLA]], i64 [[TMP4]], i64 [[TMP6]]) #[[ATTR5:[0-9]+]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK10:       omp_offload.cont:
 // CHECK10-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 0
-// CHECK10-NEXT:    [[TMP41:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-// CHECK10-NEXT:    [[TMP42:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP42]])
-// CHECK10-NEXT:    ret i32 [[TMP41]]
+// CHECK10-NEXT:    [[TMP39:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK10-NEXT:    [[TMP40:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP40]])
+// CHECK10-NEXT:    ret i32 [[TMP39]]
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l74
@@ -3993,7 +3985,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK10-SAME: () #[[ATTR6:[0-9]+]] {
+// CHECK10-SAME: () #[[ATTR7:[0-9]+]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK10-NEXT:    ret void
@@ -4029,14 +4021,14 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK11-NEXT:    [[TMP6:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK11-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
-// CHECK11-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP9]], align 4
-// CHECK11-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP11]], align 4
-// CHECK11-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP12]], align 4
+// CHECK11-NEXT:    [[TMP8:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes to i8*), i32 32, i1 false)
+// CHECK11-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP10]], align 4
+// CHECK11-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP12]], align 4
 // CHECK11-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK11-NEXT:    store i8* null, i8** [[TMP13]], align 4
 // CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -4055,46 +4047,42 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
 // CHECK11-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP23]], align 4
-// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 4, i64* [[TMP24]], align 4
-// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP25]], align 4
-// CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP27]], align 4
-// CHECK11-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP29]], align 4
-// CHECK11-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK11-NEXT:    store i64 4, i64* [[TMP30]], align 4
-// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP31]], align 4
-// CHECK11-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP35:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP35]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP36]], 0
+// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP24]], align 4
+// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP26]], align 4
+// CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP28]], align 4
+// CHECK11-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP29]], align 4
+// CHECK11-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP34]], 0
 // CHECK11-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK11-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK11-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT:    [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP37]], 1
-// CHECK11-NEXT:    [[TMP38:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4:[0-9]+]], i64 -1, i64 [[TMP38]])
-// CHECK11-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l74.region_id, i32 4, i8** [[TMP32]], i8** [[TMP33]], i64* [[TMP34]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0
-// CHECK11-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP35]], 1
+// CHECK11-NEXT:    [[TMP36:%.*]] = zext i32 [[ADD]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4:[0-9]+]], i64 -1, i64 [[TMP36]])
+// CHECK11-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l74.region_id, i32 4, i8** [[TMP30]], i8** [[TMP31]], i64* [[TMP32]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK11-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l74(i32 [[TMP0]], i32* [[VLA]], i32 [[TMP3]], i32 [[TMP5]]) #[[ATTR5:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
 // CHECK11-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i32 0
-// CHECK11-NEXT:    [[TMP41:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-// CHECK11-NEXT:    [[TMP42:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP42]])
-// CHECK11-NEXT:    ret i32 [[TMP41]]
+// CHECK11-NEXT:    [[TMP39:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK11-NEXT:    [[TMP40:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP40]])
+// CHECK11-NEXT:    ret i32 [[TMP39]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l74
@@ -4351,7 +4339,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK11-SAME: () #[[ATTR6:[0-9]+]] {
+// CHECK11-SAME: () #[[ATTR7:[0-9]+]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK11-NEXT:    ret void
@@ -4387,14 +4375,14 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK12-NEXT:    [[TMP6:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK12-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
-// CHECK12-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP9]], align 4
-// CHECK12-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP11]], align 4
-// CHECK12-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP12]], align 4
+// CHECK12-NEXT:    [[TMP8:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes to i8*), i32 32, i1 false)
+// CHECK12-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP10]], align 4
+// CHECK12-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP12]], align 4
 // CHECK12-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK12-NEXT:    store i8* null, i8** [[TMP13]], align 4
 // CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -4413,46 +4401,42 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
 // CHECK12-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP23]], align 4
-// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 4, i64* [[TMP24]], align 4
-// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP25]], align 4
-// CHECK12-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
-// CHECK12-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP27]], align 4
-// CHECK12-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK12-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP29]], align 4
-// CHECK12-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK12-NEXT:    store i64 4, i64* [[TMP30]], align 4
-// CHECK12-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK12-NEXT:    store i8* null, i8** [[TMP31]], align 4
-// CHECK12-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP35:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP35]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP36]], 0
+// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP24]], align 4
+// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP26]], align 4
+// CHECK12-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP28]], align 4
+// CHECK12-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK12-NEXT:    store i8* null, i8** [[TMP29]], align 4
+// CHECK12-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP34]], 0
 // CHECK12-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK12-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK12-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK12-NEXT:    [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP37]], 1
-// CHECK12-NEXT:    [[TMP38:%.*]] = zext i32 [[ADD]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4:[0-9]+]], i64 -1, i64 [[TMP38]])
-// CHECK12-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l74.region_id, i32 4, i8** [[TMP32]], i8** [[TMP33]], i64* [[TMP34]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0
-// CHECK12-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK12-NEXT:    [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP35]], 1
+// CHECK12-NEXT:    [[TMP36:%.*]] = zext i32 [[ADD]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4:[0-9]+]], i64 -1, i64 [[TMP36]])
+// CHECK12-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l74.region_id, i32 4, i8** [[TMP30]], i8** [[TMP31]], i64* [[TMP32]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK12-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
 // CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l74(i32 [[TMP0]], i32* [[VLA]], i32 [[TMP3]], i32 [[TMP5]]) #[[ATTR5:[0-9]+]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK12:       omp_offload.cont:
 // CHECK12-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i32 0
-// CHECK12-NEXT:    [[TMP41:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-// CHECK12-NEXT:    [[TMP42:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP42]])
-// CHECK12-NEXT:    ret i32 [[TMP41]]
+// CHECK12-NEXT:    [[TMP39:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK12-NEXT:    [[TMP40:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP40]])
+// CHECK12-NEXT:    ret i32 [[TMP39]]
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l74
@@ -4709,7 +4693,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK12-SAME: () #[[ATTR6:[0-9]+]] {
+// CHECK12-SAME: () #[[ATTR7:[0-9]+]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK12-NEXT:    ret void
@@ -6337,14 +6321,14 @@ int main (int argc, char **argv) {
 // CHECK25-NEXT:    store i32 [[TMP5]], i32* [[CONV1]], align 4
 // CHECK25-NEXT:    [[TMP6:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK25-NEXT:    [[TMP7:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK25-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK25-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK25-NEXT:    store i64 [[TMP1]], i64* [[TMP9]], align 8
-// CHECK25-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK25-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to i64*
-// CHECK25-NEXT:    store i64 [[TMP1]], i64* [[TMP11]], align 8
-// CHECK25-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK25-NEXT:    store i64 8, i64* [[TMP12]], align 8
+// CHECK25-NEXT:    [[TMP8:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK25-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP8]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes to i8*), i64 32, i1 false)
+// CHECK25-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK25-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK25-NEXT:    store i64 [[TMP1]], i64* [[TMP10]], align 8
+// CHECK25-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK25-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64*
+// CHECK25-NEXT:    store i64 [[TMP1]], i64* [[TMP12]], align 8
 // CHECK25-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK25-NEXT:    store i8* null, i8** [[TMP13]], align 8
 // CHECK25-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -6363,48 +6347,44 @@ int main (int argc, char **argv) {
 // CHECK25-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
 // CHECK25-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
 // CHECK25-NEXT:    store i64 [[TMP4]], i64* [[TMP23]], align 8
-// CHECK25-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK25-NEXT:    store i64 4, i64* [[TMP24]], align 8
-// CHECK25-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK25-NEXT:    store i8* null, i8** [[TMP25]], align 8
-// CHECK25-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
-// CHECK25-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
-// CHECK25-NEXT:    store i64 [[TMP6]], i64* [[TMP27]], align 8
-// CHECK25-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK25-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
-// CHECK25-NEXT:    store i64 [[TMP6]], i64* [[TMP29]], align 8
-// CHECK25-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK25-NEXT:    store i64 4, i64* [[TMP30]], align 8
-// CHECK25-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK25-NEXT:    store i8* null, i8** [[TMP31]], align 8
-// CHECK25-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK25-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK25-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK25-NEXT:    [[TMP35:%.*]] = load i32, i32* [[N]], align 4
-// CHECK25-NEXT:    store i32 [[TMP35]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK25-NEXT:    [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK25-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP36]], 0
+// CHECK25-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK25-NEXT:    store i8* null, i8** [[TMP24]], align 8
+// CHECK25-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK25-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
+// CHECK25-NEXT:    store i64 [[TMP6]], i64* [[TMP26]], align 8
+// CHECK25-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK25-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64*
+// CHECK25-NEXT:    store i64 [[TMP6]], i64* [[TMP28]], align 8
+// CHECK25-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK25-NEXT:    store i8* null, i8** [[TMP29]], align 8
+// CHECK25-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK25-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK25-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK25-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK25-NEXT:    store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK25-NEXT:    [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK25-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP34]], 0
 // CHECK25-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK25-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK25-NEXT:    store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK25-NEXT:    [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK25-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP37]], 1
-// CHECK25-NEXT:    [[TMP38:%.*]] = zext i32 [[ADD]] to i64
-// CHECK25-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4:[0-9]+]], i64 -1, i64 [[TMP38]])
-// CHECK25-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l166.region_id, i32 4, i8** [[TMP32]], i8** [[TMP33]], i64* [[TMP34]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK25-NEXT:    [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0
-// CHECK25-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK25-NEXT:    [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK25-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP35]], 1
+// CHECK25-NEXT:    [[TMP36:%.*]] = zext i32 [[ADD]] to i64
+// CHECK25-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4:[0-9]+]], i64 -1, i64 [[TMP36]])
+// CHECK25-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l166.region_id, i32 4, i8** [[TMP30]], i8** [[TMP31]], i64* [[TMP32]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK25-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK25-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK25:       omp_offload.failed:
 // CHECK25-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l166(i64 [[TMP1]], i32* [[VLA]], i64 [[TMP4]], i64 [[TMP6]]) #[[ATTR5:[0-9]+]]
 // CHECK25-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK25:       omp_offload.cont:
-// CHECK25-NEXT:    [[TMP41:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK25-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP41]])
+// CHECK25-NEXT:    [[TMP39:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK25-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP39]])
 // CHECK25-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK25-NEXT:    [[TMP42:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK25-NEXT:    call void @llvm.stackrestore(i8* [[TMP42]])
-// CHECK25-NEXT:    [[TMP43:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK25-NEXT:    ret i32 [[TMP43]]
+// CHECK25-NEXT:    [[TMP40:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK25-NEXT:    call void @llvm.stackrestore(i8* [[TMP40]])
+// CHECK25-NEXT:    [[TMP41:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK25-NEXT:    ret i32 [[TMP41]]
 //
 //
 // CHECK25-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l166
@@ -6668,7 +6648,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK25-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK25-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR6:[0-9]+]] comdat {
+// CHECK25-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR7:[0-9]+]] comdat {
 // CHECK25-NEXT:  entry:
 // CHECK25-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK25-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -6719,7 +6699,7 @@ int main (int argc, char **argv) {
 // CHECK25-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK25-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TE]], align 4
 // CHECK25-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i64 10)
-// CHECK25-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l155.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 0)
+// CHECK25-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l155.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 0)
 // CHECK25-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK25-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK25:       omp_offload.failed:
@@ -6904,7 +6884,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK25-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK25-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK25-SAME: () #[[ATTR8:[0-9]+]] {
 // CHECK25-NEXT:  entry:
 // CHECK25-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK25-NEXT:    ret void
@@ -6948,14 +6928,14 @@ int main (int argc, char **argv) {
 // CHECK26-NEXT:    store i32 [[TMP5]], i32* [[CONV1]], align 4
 // CHECK26-NEXT:    [[TMP6:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK26-NEXT:    [[TMP7:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK26-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK26-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK26-NEXT:    store i64 [[TMP1]], i64* [[TMP9]], align 8
-// CHECK26-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK26-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to i64*
-// CHECK26-NEXT:    store i64 [[TMP1]], i64* [[TMP11]], align 8
-// CHECK26-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK26-NEXT:    store i64 8, i64* [[TMP12]], align 8
+// CHECK26-NEXT:    [[TMP8:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK26-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP8]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes to i8*), i64 32, i1 false)
+// CHECK26-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK26-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK26-NEXT:    store i64 [[TMP1]], i64* [[TMP10]], align 8
+// CHECK26-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK26-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64*
+// CHECK26-NEXT:    store i64 [[TMP1]], i64* [[TMP12]], align 8
 // CHECK26-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK26-NEXT:    store i8* null, i8** [[TMP13]], align 8
 // CHECK26-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -6974,48 +6954,44 @@ int main (int argc, char **argv) {
 // CHECK26-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
 // CHECK26-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
 // CHECK26-NEXT:    store i64 [[TMP4]], i64* [[TMP23]], align 8
-// CHECK26-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK26-NEXT:    store i64 4, i64* [[TMP24]], align 8
-// CHECK26-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK26-NEXT:    store i8* null, i8** [[TMP25]], align 8
-// CHECK26-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
-// CHECK26-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
-// CHECK26-NEXT:    store i64 [[TMP6]], i64* [[TMP27]], align 8
-// CHECK26-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK26-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
-// CHECK26-NEXT:    store i64 [[TMP6]], i64* [[TMP29]], align 8
-// CHECK26-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK26-NEXT:    store i64 4, i64* [[TMP30]], align 8
-// CHECK26-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK26-NEXT:    store i8* null, i8** [[TMP31]], align 8
-// CHECK26-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK26-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK26-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK26-NEXT:    [[TMP35:%.*]] = load i32, i32* [[N]], align 4
-// CHECK26-NEXT:    store i32 [[TMP35]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK26-NEXT:    [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK26-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP36]], 0
+// CHECK26-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK26-NEXT:    store i8* null, i8** [[TMP24]], align 8
+// CHECK26-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK26-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
+// CHECK26-NEXT:    store i64 [[TMP6]], i64* [[TMP26]], align 8
+// CHECK26-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK26-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64*
+// CHECK26-NEXT:    store i64 [[TMP6]], i64* [[TMP28]], align 8
+// CHECK26-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK26-NEXT:    store i8* null, i8** [[TMP29]], align 8
+// CHECK26-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK26-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK26-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK26-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK26-NEXT:    store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK26-NEXT:    [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK26-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP34]], 0
 // CHECK26-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK26-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK26-NEXT:    store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK26-NEXT:    [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK26-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP37]], 1
-// CHECK26-NEXT:    [[TMP38:%.*]] = zext i32 [[ADD]] to i64
-// CHECK26-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4:[0-9]+]], i64 -1, i64 [[TMP38]])
-// CHECK26-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l166.region_id, i32 4, i8** [[TMP32]], i8** [[TMP33]], i64* [[TMP34]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK26-NEXT:    [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0
-// CHECK26-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK26-NEXT:    [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK26-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP35]], 1
+// CHECK26-NEXT:    [[TMP36:%.*]] = zext i32 [[ADD]] to i64
+// CHECK26-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4:[0-9]+]], i64 -1, i64 [[TMP36]])
+// CHECK26-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l166.region_id, i32 4, i8** [[TMP30]], i8** [[TMP31]], i64* [[TMP32]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK26-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK26-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK26:       omp_offload.failed:
 // CHECK26-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l166(i64 [[TMP1]], i32* [[VLA]], i64 [[TMP4]], i64 [[TMP6]]) #[[ATTR5:[0-9]+]]
 // CHECK26-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK26:       omp_offload.cont:
-// CHECK26-NEXT:    [[TMP41:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK26-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP41]])
+// CHECK26-NEXT:    [[TMP39:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK26-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP39]])
 // CHECK26-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK26-NEXT:    [[TMP42:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK26-NEXT:    call void @llvm.stackrestore(i8* [[TMP42]])
-// CHECK26-NEXT:    [[TMP43:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK26-NEXT:    ret i32 [[TMP43]]
+// CHECK26-NEXT:    [[TMP40:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK26-NEXT:    call void @llvm.stackrestore(i8* [[TMP40]])
+// CHECK26-NEXT:    [[TMP41:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK26-NEXT:    ret i32 [[TMP41]]
 //
 //
 // CHECK26-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l166
@@ -7279,7 +7255,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK26-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK26-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR6:[0-9]+]] comdat {
+// CHECK26-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR7:[0-9]+]] comdat {
 // CHECK26-NEXT:  entry:
 // CHECK26-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK26-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -7330,7 +7306,7 @@ int main (int argc, char **argv) {
 // CHECK26-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK26-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TE]], align 4
 // CHECK26-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i64 10)
-// CHECK26-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l155.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 0)
+// CHECK26-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l155.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 0)
 // CHECK26-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK26-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK26:       omp_offload.failed:
@@ -7515,7 +7491,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK26-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK26-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK26-SAME: () #[[ATTR8:[0-9]+]] {
 // CHECK26-NEXT:  entry:
 // CHECK26-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK26-NEXT:    ret void
@@ -7557,14 +7533,14 @@ int main (int argc, char **argv) {
 // CHECK27-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK27-NEXT:    [[TMP6:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK27-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
-// CHECK27-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK27-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK27-NEXT:    store i32 [[TMP0]], i32* [[TMP9]], align 4
-// CHECK27-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK27-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to i32*
-// CHECK27-NEXT:    store i32 [[TMP0]], i32* [[TMP11]], align 4
-// CHECK27-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK27-NEXT:    store i64 4, i64* [[TMP12]], align 4
+// CHECK27-NEXT:    [[TMP8:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK27-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes to i8*), i32 32, i1 false)
+// CHECK27-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK27-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK27-NEXT:    store i32 [[TMP0]], i32* [[TMP10]], align 4
+// CHECK27-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK27-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
+// CHECK27-NEXT:    store i32 [[TMP0]], i32* [[TMP12]], align 4
 // CHECK27-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK27-NEXT:    store i8* null, i8** [[TMP13]], align 4
 // CHECK27-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -7583,48 +7559,44 @@ int main (int argc, char **argv) {
 // CHECK27-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
 // CHECK27-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
 // CHECK27-NEXT:    store i32 [[TMP3]], i32* [[TMP23]], align 4
-// CHECK27-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK27-NEXT:    store i64 4, i64* [[TMP24]], align 4
-// CHECK27-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK27-NEXT:    store i8* null, i8** [[TMP25]], align 4
-// CHECK27-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
-// CHECK27-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
-// CHECK27-NEXT:    store i32 [[TMP5]], i32* [[TMP27]], align 4
-// CHECK27-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK27-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
-// CHECK27-NEXT:    store i32 [[TMP5]], i32* [[TMP29]], align 4
-// CHECK27-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK27-NEXT:    store i64 4, i64* [[TMP30]], align 4
-// CHECK27-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK27-NEXT:    store i8* null, i8** [[TMP31]], align 4
-// CHECK27-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK27-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK27-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK27-NEXT:    [[TMP35:%.*]] = load i32, i32* [[N]], align 4
-// CHECK27-NEXT:    store i32 [[TMP35]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK27-NEXT:    [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK27-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP36]], 0
+// CHECK27-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK27-NEXT:    store i8* null, i8** [[TMP24]], align 4
+// CHECK27-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK27-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
+// CHECK27-NEXT:    store i32 [[TMP5]], i32* [[TMP26]], align 4
+// CHECK27-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK27-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
+// CHECK27-NEXT:    store i32 [[TMP5]], i32* [[TMP28]], align 4
+// CHECK27-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK27-NEXT:    store i8* null, i8** [[TMP29]], align 4
+// CHECK27-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK27-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK27-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK27-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK27-NEXT:    store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK27-NEXT:    [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK27-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP34]], 0
 // CHECK27-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK27-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK27-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK27-NEXT:    [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK27-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP37]], 1
-// CHECK27-NEXT:    [[TMP38:%.*]] = zext i32 [[ADD]] to i64
-// CHECK27-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4:[0-9]+]], i64 -1, i64 [[TMP38]])
-// CHECK27-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l166.region_id, i32 4, i8** [[TMP32]], i8** [[TMP33]], i64* [[TMP34]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK27-NEXT:    [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0
-// CHECK27-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK27-NEXT:    [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK27-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP35]], 1
+// CHECK27-NEXT:    [[TMP36:%.*]] = zext i32 [[ADD]] to i64
+// CHECK27-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4:[0-9]+]], i64 -1, i64 [[TMP36]])
+// CHECK27-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l166.region_id, i32 4, i8** [[TMP30]], i8** [[TMP31]], i64* [[TMP32]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK27-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK27-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK27:       omp_offload.failed:
 // CHECK27-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l166(i32 [[TMP0]], i32* [[VLA]], i32 [[TMP3]], i32 [[TMP5]]) #[[ATTR5:[0-9]+]]
 // CHECK27-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK27:       omp_offload.cont:
-// CHECK27-NEXT:    [[TMP41:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK27-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP41]])
+// CHECK27-NEXT:    [[TMP39:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK27-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP39]])
 // CHECK27-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK27-NEXT:    [[TMP42:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK27-NEXT:    call void @llvm.stackrestore(i8* [[TMP42]])
-// CHECK27-NEXT:    [[TMP43:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK27-NEXT:    ret i32 [[TMP43]]
+// CHECK27-NEXT:    [[TMP40:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK27-NEXT:    call void @llvm.stackrestore(i8* [[TMP40]])
+// CHECK27-NEXT:    [[TMP41:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK27-NEXT:    ret i32 [[TMP41]]
 //
 //
 // CHECK27-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l166
@@ -7881,7 +7853,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK27-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK27-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR6:[0-9]+]] comdat {
+// CHECK27-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR7:[0-9]+]] comdat {
 // CHECK27-NEXT:  entry:
 // CHECK27-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK27-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -7930,7 +7902,7 @@ int main (int argc, char **argv) {
 // CHECK27-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK27-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TE]], align 4
 // CHECK27-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i64 10)
-// CHECK27-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l155.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 0)
+// CHECK27-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l155.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 0)
 // CHECK27-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK27-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK27:       omp_offload.failed:
@@ -8108,7 +8080,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK27-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK27-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK27-SAME: () #[[ATTR8:[0-9]+]] {
 // CHECK27-NEXT:  entry:
 // CHECK27-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK27-NEXT:    ret void
@@ -8150,14 +8122,14 @@ int main (int argc, char **argv) {
 // CHECK28-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK28-NEXT:    [[TMP6:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK28-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
-// CHECK28-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK28-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK28-NEXT:    store i32 [[TMP0]], i32* [[TMP9]], align 4
-// CHECK28-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK28-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to i32*
-// CHECK28-NEXT:    store i32 [[TMP0]], i32* [[TMP11]], align 4
-// CHECK28-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK28-NEXT:    store i64 4, i64* [[TMP12]], align 4
+// CHECK28-NEXT:    [[TMP8:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK28-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes to i8*), i32 32, i1 false)
+// CHECK28-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK28-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK28-NEXT:    store i32 [[TMP0]], i32* [[TMP10]], align 4
+// CHECK28-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK28-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
+// CHECK28-NEXT:    store i32 [[TMP0]], i32* [[TMP12]], align 4
 // CHECK28-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK28-NEXT:    store i8* null, i8** [[TMP13]], align 4
 // CHECK28-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -8176,48 +8148,44 @@ int main (int argc, char **argv) {
 // CHECK28-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
 // CHECK28-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
 // CHECK28-NEXT:    store i32 [[TMP3]], i32* [[TMP23]], align 4
-// CHECK28-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK28-NEXT:    store i64 4, i64* [[TMP24]], align 4
-// CHECK28-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK28-NEXT:    store i8* null, i8** [[TMP25]], align 4
-// CHECK28-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
-// CHECK28-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
-// CHECK28-NEXT:    store i32 [[TMP5]], i32* [[TMP27]], align 4
-// CHECK28-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK28-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
-// CHECK28-NEXT:    store i32 [[TMP5]], i32* [[TMP29]], align 4
-// CHECK28-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK28-NEXT:    store i64 4, i64* [[TMP30]], align 4
-// CHECK28-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK28-NEXT:    store i8* null, i8** [[TMP31]], align 4
-// CHECK28-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK28-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK28-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK28-NEXT:    [[TMP35:%.*]] = load i32, i32* [[N]], align 4
-// CHECK28-NEXT:    store i32 [[TMP35]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK28-NEXT:    [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK28-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP36]], 0
+// CHECK28-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK28-NEXT:    store i8* null, i8** [[TMP24]], align 4
+// CHECK28-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK28-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
+// CHECK28-NEXT:    store i32 [[TMP5]], i32* [[TMP26]], align 4
+// CHECK28-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
+// CHECK28-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
+// CHECK28-NEXT:    store i32 [[TMP5]], i32* [[TMP28]], align 4
+// CHECK28-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK28-NEXT:    store i8* null, i8** [[TMP29]], align 4
+// CHECK28-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK28-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK28-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK28-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK28-NEXT:    store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK28-NEXT:    [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK28-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP34]], 0
 // CHECK28-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK28-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK28-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK28-NEXT:    [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK28-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP37]], 1
-// CHECK28-NEXT:    [[TMP38:%.*]] = zext i32 [[ADD]] to i64
-// CHECK28-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4:[0-9]+]], i64 -1, i64 [[TMP38]])
-// CHECK28-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l166.region_id, i32 4, i8** [[TMP32]], i8** [[TMP33]], i64* [[TMP34]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK28-NEXT:    [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0
-// CHECK28-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK28-NEXT:    [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK28-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP35]], 1
+// CHECK28-NEXT:    [[TMP36:%.*]] = zext i32 [[ADD]] to i64
+// CHECK28-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4:[0-9]+]], i64 -1, i64 [[TMP36]])
+// CHECK28-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l166.region_id, i32 4, i8** [[TMP30]], i8** [[TMP31]], i64* [[TMP32]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK28-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK28-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK28:       omp_offload.failed:
 // CHECK28-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l166(i32 [[TMP0]], i32* [[VLA]], i32 [[TMP3]], i32 [[TMP5]]) #[[ATTR5:[0-9]+]]
 // CHECK28-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK28:       omp_offload.cont:
-// CHECK28-NEXT:    [[TMP41:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK28-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP41]])
+// CHECK28-NEXT:    [[TMP39:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK28-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP39]])
 // CHECK28-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK28-NEXT:    [[TMP42:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK28-NEXT:    call void @llvm.stackrestore(i8* [[TMP42]])
-// CHECK28-NEXT:    [[TMP43:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK28-NEXT:    ret i32 [[TMP43]]
+// CHECK28-NEXT:    [[TMP40:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK28-NEXT:    call void @llvm.stackrestore(i8* [[TMP40]])
+// CHECK28-NEXT:    [[TMP41:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK28-NEXT:    ret i32 [[TMP41]]
 //
 //
 // CHECK28-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l166
@@ -8474,7 +8442,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK28-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK28-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR6:[0-9]+]] comdat {
+// CHECK28-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR7:[0-9]+]] comdat {
 // CHECK28-NEXT:  entry:
 // CHECK28-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK28-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -8523,7 +8491,7 @@ int main (int argc, char **argv) {
 // CHECK28-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK28-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TE]], align 4
 // CHECK28-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i64 10)
-// CHECK28-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l155.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 0)
+// CHECK28-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB4]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l155.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 0)
 // CHECK28-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK28-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK28:       omp_offload.failed:
@@ -8701,7 +8669,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK28-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK28-SAME: () #[[ATTR7:[0-9]+]] {
+// CHECK28-SAME: () #[[ATTR8:[0-9]+]] {
 // CHECK28-NEXT:  entry:
 // CHECK28-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK28-NEXT:    ret void

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp
index f1e588bfa5f3f..981bcd84b342b 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp
@@ -29,7 +29,7 @@ struct SS{
     #pragma omp teams distribute parallel for simd collapse(2)
     for(int i = 0; i < X; i++) {
       for(int j = 0; j < Y; j++) {
-	a[i][j] = (T)0;
+        a[i][j] = (T)0;
       }
     }
 
@@ -1362,14 +1362,14 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP9:%.*]] = load i64, i64* [[M_CASTED]], align 8
 // CHECK9-NEXT:    [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]]
 // CHECK9-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
-// CHECK9-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP13]], align 8
-// CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP15]], align 8
-// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP16]], align 8
+// CHECK9-NEXT:    [[TMP12:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP12]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes to i8*), i64 40, i1 false)
+// CHECK9-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP14]], align 8
+// CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP16]], align 8
 // CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK9-NEXT:    store i8* null, i8** [[TMP17]], align 8
 // CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1378,75 +1378,69 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP9]], i64* [[TMP21]], align 8
-// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 4, i64* [[TMP22]], align 8
-// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP25]], align 8
-// CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP27]], align 8
-// CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 8, i64* [[TMP28]], align 8
-// CHECK9-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP29]], align 8
-// CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP24]], align 8
+// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP26]], align 8
+// CHECK9-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP27]], align 8
+// CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP3]], i64* [[TMP29]], align 8
+// CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK9-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP3]], i64* [[TMP31]], align 8
-// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP3]], i64* [[TMP33]], align 8
-// CHECK9-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK9-NEXT:    store i64 8, i64* [[TMP34]], align 8
-// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP35]], align 8
-// CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP37]], align 8
-// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP39]], align 8
-// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK9-NEXT:    store i64 [[TMP11]], i64* [[TMP40]], align 8
-// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK9-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP45:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[TMP46:%.*]] = load i32, i32* [[M]], align 4
-// CHECK9-NEXT:    store i32 [[TMP46]], i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK9-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP32]], align 8
+// CHECK9-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP34]], align 8
+// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 8
+// CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK9-NEXT:    store i64 [[TMP11]], i64* [[TMP37]], align 8
+// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK9-NEXT:    store i8* null, i8** [[TMP38]], align 8
+// CHECK9-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP42:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[TMP43:%.*]] = load i32, i32* [[M]], align 4
+// CHECK9-NEXT:    store i32 [[TMP43]], i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK9-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK9-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK9-NEXT:    [[CONV5:%.*]] = sext i32 [[DIV]] to i64
-// CHECK9-NEXT:    [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK9-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP48]], 0
+// CHECK9-NEXT:    [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK9-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0
 // CHECK9-NEXT:    [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
 // CHECK9-NEXT:    [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
 // CHECK9-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]]
 // CHECK9-NEXT:    [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK9-NEXT:    store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK9-NEXT:    [[TMP49:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP49]], 1
+// CHECK9-NEXT:    [[TMP46:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
+// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP46]], 1
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK9-NEXT:    [[TMP50:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l86.region_id, i32 5, i8** [[TMP42]], i8** [[TMP43]], i64* [[TMP44]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
-// CHECK9-NEXT:    br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[TMP47:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l86.region_id, i32 5, i8** [[TMP39]], i8** [[TMP40]], i64* [[TMP41]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0
+// CHECK9-NEXT:    br i1 [[TMP48]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l86(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[TMP52:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP52]])
+// CHECK9-NEXT:    [[TMP49:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP49]])
 // CHECK9-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    [[TMP53:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP53]])
-// CHECK9-NEXT:    [[TMP54:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    ret i32 [[TMP54]]
+// CHECK9-NEXT:    [[TMP50:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP50]])
+// CHECK9-NEXT:    [[TMP51:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    ret i32 [[TMP51]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l86
@@ -1776,7 +1770,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -1797,7 +1791,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 20)
-// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l72.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l72.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK9-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK9-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
@@ -1990,7 +1984,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK9-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK9-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK9-NEXT:    ret void
@@ -2043,14 +2037,14 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP9:%.*]] = load i64, i64* [[M_CASTED]], align 8
 // CHECK10-NEXT:    [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]]
 // CHECK10-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
-// CHECK10-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP13]], align 8
-// CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP15]], align 8
-// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP16]], align 8
+// CHECK10-NEXT:    [[TMP12:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP12]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes to i8*), i64 40, i1 false)
+// CHECK10-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP14]], align 8
+// CHECK10-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP16]], align 8
 // CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK10-NEXT:    store i8* null, i8** [[TMP17]], align 8
 // CHECK10-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2059,75 +2053,69 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP9]], i64* [[TMP21]], align 8
-// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 4, i64* [[TMP22]], align 8
-// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP25]], align 8
-// CHECK10-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP27]], align 8
-// CHECK10-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 8, i64* [[TMP28]], align 8
-// CHECK10-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP29]], align 8
-// CHECK10-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP24]], align 8
+// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP26]], align 8
+// CHECK10-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP27]], align 8
+// CHECK10-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP3]], i64* [[TMP29]], align 8
+// CHECK10-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK10-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP3]], i64* [[TMP31]], align 8
-// CHECK10-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK10-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP3]], i64* [[TMP33]], align 8
-// CHECK10-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK10-NEXT:    store i64 8, i64* [[TMP34]], align 8
-// CHECK10-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK10-NEXT:    store i8* null, i8** [[TMP35]], align 8
-// CHECK10-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK10-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP37]], align 8
-// CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK10-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP39]], align 8
-// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK10-NEXT:    store i64 [[TMP11]], i64* [[TMP40]], align 8
-// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK10-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK10-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP45:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[TMP46:%.*]] = load i32, i32* [[M]], align 4
-// CHECK10-NEXT:    store i32 [[TMP46]], i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK10-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK10-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK10-NEXT:    store i8* null, i8** [[TMP32]], align 8
+// CHECK10-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK10-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP34]], align 8
+// CHECK10-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK10-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 8
+// CHECK10-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK10-NEXT:    store i64 [[TMP11]], i64* [[TMP37]], align 8
+// CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK10-NEXT:    store i8* null, i8** [[TMP38]], align 8
+// CHECK10-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP42:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[TMP43:%.*]] = load i32, i32* [[M]], align 4
+// CHECK10-NEXT:    store i32 [[TMP43]], i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK10-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK10-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK10-NEXT:    [[CONV5:%.*]] = sext i32 [[DIV]] to i64
-// CHECK10-NEXT:    [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK10-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP48]], 0
+// CHECK10-NEXT:    [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK10-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0
 // CHECK10-NEXT:    [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
 // CHECK10-NEXT:    [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
 // CHECK10-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]]
 // CHECK10-NEXT:    [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK10-NEXT:    store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK10-NEXT:    [[TMP49:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP49]], 1
+// CHECK10-NEXT:    [[TMP46:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
+// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP46]], 1
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK10-NEXT:    [[TMP50:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l86.region_id, i32 5, i8** [[TMP42]], i8** [[TMP43]], i64* [[TMP44]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
-// CHECK10-NEXT:    br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK10-NEXT:    [[TMP47:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l86.region_id, i32 5, i8** [[TMP39]], i8** [[TMP40]], i64* [[TMP41]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0
+// CHECK10-NEXT:    br i1 [[TMP48]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
 // CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l86(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK10:       omp_offload.cont:
-// CHECK10-NEXT:    [[TMP52:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP52]])
+// CHECK10-NEXT:    [[TMP49:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP49]])
 // CHECK10-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    [[TMP53:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP53]])
-// CHECK10-NEXT:    [[TMP54:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    ret i32 [[TMP54]]
+// CHECK10-NEXT:    [[TMP50:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP50]])
+// CHECK10-NEXT:    [[TMP51:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK10-NEXT:    ret i32 [[TMP51]]
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l86
@@ -2457,7 +2445,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -2478,7 +2466,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 20)
-// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l72.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l72.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK10-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK10-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
@@ -2671,7 +2659,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK10-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK10-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK10-NEXT:    ret void
@@ -2721,14 +2709,14 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP0]], [[TMP1]]
 // CHECK11-NEXT:    [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4
 // CHECK11-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
-// CHECK11-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP12]], align 4
-// CHECK11-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP14]], align 4
-// CHECK11-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP15]], align 4
+// CHECK11-NEXT:    [[TMP11:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes to i8*), i32 40, i1 false)
+// CHECK11-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP13]], align 4
+// CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP15]], align 4
 // CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK11-NEXT:    store i8* null, i8** [[TMP16]], align 4
 // CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2737,75 +2725,69 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP7]], i32* [[TMP20]], align 4
-// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP21]], align 4
-// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP22]], align 4
-// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP24]], align 4
-// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP26]], align 4
-// CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 4, i64* [[TMP27]], align 4
-// CHECK11-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP28]], align 4
-// CHECK11-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP21]], align 4
+// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP23]], align 4
+// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP25]], align 4
+// CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP26]], align 4
+// CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP1]], i32* [[TMP28]], align 4
+// CHECK11-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK11-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP1]], i32* [[TMP30]], align 4
-// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP1]], i32* [[TMP32]], align 4
-// CHECK11-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK11-NEXT:    store i64 4, i64* [[TMP33]], align 4
-// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP34]], align 4
-// CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 4
-// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP38]], align 4
-// CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK11-NEXT:    store i64 [[TMP10]], i64* [[TMP39]], align 4
-// CHECK11-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK11-NEXT:    store i8* null, i8** [[TMP40]], align 4
-// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP44:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP44]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[TMP45:%.*]] = load i32, i32* [[M]], align 4
-// CHECK11-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK11-NEXT:    [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP46]], 0
+// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP31]], align 4
+// CHECK11-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP33]], align 4
+// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP35]], align 4
+// CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK11-NEXT:    store i64 [[TMP10]], i64* [[TMP36]], align 4
+// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK11-NEXT:    store i8* null, i8** [[TMP37]], align 4
+// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP41:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP41]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[TMP42:%.*]] = load i32, i32* [[M]], align 4
+// CHECK11-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT:    [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0
 // CHECK11-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK11-NEXT:    [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK11-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK11-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK11-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK11-NEXT:    [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
 // CHECK11-NEXT:    [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
 // CHECK11-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
 // CHECK11-NEXT:    [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK11-NEXT:    store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK11-NEXT:    [[TMP48:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP48]], 1
+// CHECK11-NEXT:    [[TMP45:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP45]], 1
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK11-NEXT:    [[TMP49:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l86.region_id, i32 5, i8** [[TMP41]], i8** [[TMP42]], i64* [[TMP43]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0
-// CHECK11-NEXT:    br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[TMP46:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l86.region_id, i32 5, i8** [[TMP38]], i8** [[TMP39]], i64* [[TMP40]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0
+// CHECK11-NEXT:    br i1 [[TMP47]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l86(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[TMP51:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP51]])
+// CHECK11-NEXT:    [[TMP48:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP48]])
 // CHECK11-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    [[TMP52:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP52]])
-// CHECK11-NEXT:    [[TMP53:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    ret i32 [[TMP53]]
+// CHECK11-NEXT:    [[TMP49:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP49]])
+// CHECK11-NEXT:    [[TMP50:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK11-NEXT:    ret i32 [[TMP50]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l86
@@ -3135,7 +3117,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -3156,7 +3138,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 20)
-// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l72.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l72.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK11-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK11-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
@@ -3343,7 +3325,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK11-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK11-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK11-NEXT:    ret void
@@ -3393,14 +3375,14 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP0]], [[TMP1]]
 // CHECK12-NEXT:    [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4
 // CHECK12-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
-// CHECK12-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP12]], align 4
-// CHECK12-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP14]], align 4
-// CHECK12-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP15]], align 4
+// CHECK12-NEXT:    [[TMP11:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes to i8*), i32 40, i1 false)
+// CHECK12-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP13]], align 4
+// CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP15]], align 4
 // CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK12-NEXT:    store i8* null, i8** [[TMP16]], align 4
 // CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -3409,75 +3391,69 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP7]], i32* [[TMP20]], align 4
-// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP21]], align 4
-// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP22]], align 4
-// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP24]], align 4
-// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP26]], align 4
-// CHECK12-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 4, i64* [[TMP27]], align 4
-// CHECK12-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP28]], align 4
-// CHECK12-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP21]], align 4
+// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP23]], align 4
+// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP25]], align 4
+// CHECK12-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP26]], align 4
+// CHECK12-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP1]], i32* [[TMP28]], align 4
+// CHECK12-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK12-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP1]], i32* [[TMP30]], align 4
-// CHECK12-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK12-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP1]], i32* [[TMP32]], align 4
-// CHECK12-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK12-NEXT:    store i64 4, i64* [[TMP33]], align 4
-// CHECK12-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK12-NEXT:    store i8* null, i8** [[TMP34]], align 4
-// CHECK12-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK12-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 4
-// CHECK12-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK12-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP38]], align 4
-// CHECK12-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK12-NEXT:    store i64 [[TMP10]], i64* [[TMP39]], align 4
-// CHECK12-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK12-NEXT:    store i8* null, i8** [[TMP40]], align 4
-// CHECK12-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP44:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP44]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[TMP45:%.*]] = load i32, i32* [[M]], align 4
-// CHECK12-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK12-NEXT:    [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP46]], 0
+// CHECK12-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK12-NEXT:    store i8* null, i8** [[TMP31]], align 4
+// CHECK12-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK12-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP33]], align 4
+// CHECK12-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK12-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP35]], align 4
+// CHECK12-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK12-NEXT:    store i64 [[TMP10]], i64* [[TMP36]], align 4
+// CHECK12-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK12-NEXT:    store i8* null, i8** [[TMP37]], align 4
+// CHECK12-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP41:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP41]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[TMP42:%.*]] = load i32, i32* [[M]], align 4
+// CHECK12-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK12-NEXT:    [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0
 // CHECK12-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK12-NEXT:    [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK12-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK12-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK12-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK12-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK12-NEXT:    [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
 // CHECK12-NEXT:    [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
 // CHECK12-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
 // CHECK12-NEXT:    [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK12-NEXT:    store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK12-NEXT:    [[TMP48:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP48]], 1
+// CHECK12-NEXT:    [[TMP45:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP45]], 1
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK12-NEXT:    [[TMP49:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l86.region_id, i32 5, i8** [[TMP41]], i8** [[TMP42]], i64* [[TMP43]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0
-// CHECK12-NEXT:    br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK12-NEXT:    [[TMP46:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l86.region_id, i32 5, i8** [[TMP38]], i8** [[TMP39]], i64* [[TMP40]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0
+// CHECK12-NEXT:    br i1 [[TMP47]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
 // CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l86(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK12:       omp_offload.cont:
-// CHECK12-NEXT:    [[TMP51:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP51]])
+// CHECK12-NEXT:    [[TMP48:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP48]])
 // CHECK12-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    [[TMP52:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP52]])
-// CHECK12-NEXT:    [[TMP53:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    ret i32 [[TMP53]]
+// CHECK12-NEXT:    [[TMP49:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP49]])
+// CHECK12-NEXT:    [[TMP50:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK12-NEXT:    ret i32 [[TMP50]]
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l86
@@ -3807,7 +3783,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK12-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -3828,7 +3804,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 20)
-// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l72.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l72.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK12-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK12-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
@@ -4015,7 +3991,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK12-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK12-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK12-NEXT:    ret void

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
index 1c4ca161bae74..fc1612f0895ba 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
@@ -3105,14 +3105,14 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK9-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK9-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK9-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK9-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK9-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK9-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK9-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK9-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK9-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -3121,174 +3121,166 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK9-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK9-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK9-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK9-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK9-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK9-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK9-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK9-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK9-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK9-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK9-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK9-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK9-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK9-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK9-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK9-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i64 24, i1 false)
 // CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK9-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK9-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK9-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK9-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK9-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK9-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK9-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK9-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK9-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK9-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK9-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK9-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK9-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK9-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK9-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK9-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK9-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP60]])
-// CHECK9-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l113.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK9-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK9-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK9-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK9-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP58]])
+// CHECK9-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l113.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK9-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK9:       omp_offload.failed16:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l113(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l113(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK9:       omp_offload.cont17:
-// CHECK9-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M]], align 4
+// CHECK9-NEXT:    [[TMP61:%.*]] = load i32, i32* [[M]], align 4
 // CHECK9-NEXT:    [[CONV18:%.*]] = bitcast i64* [[M_CASTED]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP63]], i32* [[CONV18]], align 4
-// CHECK9-NEXT:    [[TMP64:%.*]] = load i64, i64* [[M_CASTED]], align 8
-// CHECK9-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP61]], i32* [[CONV18]], align 4
+// CHECK9-NEXT:    [[TMP62:%.*]] = load i64, i64* [[M_CASTED]], align 8
+// CHECK9-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
 // CHECK9-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP65]], i32* [[CONV20]], align 4
-// CHECK9-NEXT:    [[TMP66:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK9-NEXT:    [[TMP67:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK9-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP64]], i64* [[TMP69]], align 8
-// CHECK9-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP64]], i64* [[TMP71]], align 8
-// CHECK9-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP72]], align 8
-// CHECK9-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP73]], align 8
-// CHECK9-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 [[TMP63]], i32* [[CONV20]], align 4
+// CHECK9-NEXT:    [[TMP64:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK9-NEXT:    [[TMP65:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK9-NEXT:    [[TMP66:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES24]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP66]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i64 32, i1 false)
+// CHECK9-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP62]], i64* [[TMP68]], align 8
+// CHECK9-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP62]], i64* [[TMP70]], align 8
+// CHECK9-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP71]], align 8
+// CHECK9-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP64]], i64* [[TMP73]], align 8
+// CHECK9-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP66]], i64* [[TMP75]], align 8
-// CHECK9-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP66]], i64* [[TMP77]], align 8
-// CHECK9-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 4, i64* [[TMP78]], align 8
-// CHECK9-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP79]], align 8
-// CHECK9-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP81]], align 8
-// CHECK9-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP83]], align 8
-// CHECK9-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 8, i64* [[TMP84]], align 8
-// CHECK9-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP85]], align 8
-// CHECK9-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP87]], align 8
-// CHECK9-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP89]], align 8
-// CHECK9-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 3
-// CHECK9-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK9-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP91]], align 8
-// CHECK9-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP95:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP95]], i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK9-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK9-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP96]], 0
+// CHECK9-NEXT:    store i64 [[TMP64]], i64* [[TMP75]], align 8
+// CHECK9-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP76]], align 8
+// CHECK9-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
+// CHECK9-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP80]], align 8
+// CHECK9-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP81]], align 8
+// CHECK9-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 8
+// CHECK9-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP85]], align 8
+// CHECK9-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 3
+// CHECK9-NEXT:    store i64 [[TMP65]], i64* [[TMP86]], align 8
+// CHECK9-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP87]], align 8
+// CHECK9-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP91:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP91]], i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK9-NEXT:    [[TMP92:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK9-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP92]], 0
 // CHECK9-NEXT:    [[DIV29:%.*]] = sdiv i32 [[SUB28]], 1
 // CHECK9-NEXT:    [[SUB30:%.*]] = sub nsw i32 [[DIV29]], 1
 // CHECK9-NEXT:    store i32 [[SUB30]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK9-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK9-NEXT:    [[ADD31:%.*]] = add nsw i32 [[TMP97]], 1
-// CHECK9-NEXT:    [[TMP98:%.*]] = zext i32 [[ADD31]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP98]])
-// CHECK9-NEXT:    [[TMP99:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l118.region_id, i32 4, i8** [[TMP92]], i8** [[TMP93]], i64* [[TMP94]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK9-NEXT:    [[TMP100:%.*]] = icmp ne i32 [[TMP99]], 0
-// CHECK9-NEXT:    br i1 [[TMP100]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
+// CHECK9-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK9-NEXT:    [[ADD31:%.*]] = add nsw i32 [[TMP93]], 1
+// CHECK9-NEXT:    [[TMP94:%.*]] = zext i32 [[ADD31]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP94]])
+// CHECK9-NEXT:    [[TMP95:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l118.region_id, i32 4, i8** [[TMP88]], i8** [[TMP89]], i64* [[TMP90]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP96:%.*]] = icmp ne i32 [[TMP95]], 0
+// CHECK9-NEXT:    br i1 [[TMP96]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
 // CHECK9:       omp_offload.failed32:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l118(i64 [[TMP64]], i64 [[TMP66]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l118(i64 [[TMP62]], i64 [[TMP64]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT33]]
 // CHECK9:       omp_offload.cont33:
-// CHECK9-NEXT:    [[TMP101:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP101]])
+// CHECK9-NEXT:    [[TMP97:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP97]])
 // CHECK9-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    [[TMP102:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP102]])
-// CHECK9-NEXT:    [[TMP103:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    ret i32 [[TMP103]]
+// CHECK9-NEXT:    [[TMP98:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP98]])
+// CHECK9-NEXT:    [[TMP99:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    ret i32 [[TMP99]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108
@@ -3786,11 +3778,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK9-NEXT:    store i32 [[TMP3]], i32* [[CONV2]], align 4
 // CHECK9-NEXT:    [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3872,7 +3864,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK9-NEXT:    store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !24
 // CHECK9-NEXT:    [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !24
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]), !llvm.access.group !24
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]), !llvm.access.group !24
 // CHECK9-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK9:       omp.inner.for.inc:
 // CHECK9-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24
@@ -3926,7 +3918,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -4047,7 +4039,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -4078,7 +4070,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l86.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l86.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK9-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK9-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
@@ -4096,7 +4088,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK9-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l91.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.13, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l91.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK9-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK9-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK9:       omp_offload.failed5:
@@ -4126,7 +4118,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK9-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l96.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK9-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l96.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK9-NEXT:    [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
 // CHECK9-NEXT:    br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK9:       omp_offload.failed11:
@@ -4142,11 +4134,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK9-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -4194,7 +4186,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK9-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30
 // CHECK9-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !30
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !30
 // CHECK9-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK9:       omp.inner.for.inc:
 // CHECK9-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30
@@ -4216,7 +4208,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -4306,11 +4298,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK9-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -4358,7 +4350,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK9-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36
 // CHECK9-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !36
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !36
 // CHECK9-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK9:       omp.inner.for.inc:
 // CHECK9-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36
@@ -4380,7 +4372,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -4481,11 +4473,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK9-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK9-NEXT:    [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -4541,7 +4533,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK9-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !42
 // CHECK9-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !42
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !42
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !42
 // CHECK9-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK9:       omp.inner.for.inc:
 // CHECK9-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
@@ -4584,7 +4576,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -4672,7 +4664,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK9-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK9-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK9-NEXT:    ret void
@@ -4729,14 +4721,14 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK10-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK10-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK10-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK10-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK10-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK10-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK10-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK10-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK10-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK10-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -4745,174 +4737,166 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK10-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK10-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK10-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK10-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK10-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK10-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK10-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK10-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK10-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK10-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK10-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK10-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK10-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK10-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK10-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
 // CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK10:       omp_offload.cont:
-// CHECK10-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK10-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK10-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK10-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK10-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK10-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK10-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK10-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK10-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i64 24, i1 false)
 // CHECK10-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK10-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK10-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK10-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK10-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK10-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK10-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK10-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK10-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK10-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK10-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK10-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK10-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK10-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK10-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK10-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK10-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK10-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK10-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK10-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK10-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK10-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK10-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK10-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK10-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK10-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK10-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK10-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK10-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK10-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK10-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK10-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK10-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK10-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK10-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP60]])
-// CHECK10-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l113.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK10-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK10-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK10-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK10-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP58]])
+// CHECK10-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l113.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK10-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK10:       omp_offload.failed16:
-// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l113(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l113(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK10:       omp_offload.cont17:
-// CHECK10-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M]], align 4
+// CHECK10-NEXT:    [[TMP61:%.*]] = load i32, i32* [[M]], align 4
 // CHECK10-NEXT:    [[CONV18:%.*]] = bitcast i64* [[M_CASTED]] to i32*
-// CHECK10-NEXT:    store i32 [[TMP63]], i32* [[CONV18]], align 4
-// CHECK10-NEXT:    [[TMP64:%.*]] = load i64, i64* [[M_CASTED]], align 8
-// CHECK10-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP61]], i32* [[CONV18]], align 4
+// CHECK10-NEXT:    [[TMP62:%.*]] = load i64, i64* [[M_CASTED]], align 8
+// CHECK10-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
 // CHECK10-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK10-NEXT:    store i32 [[TMP65]], i32* [[CONV20]], align 4
-// CHECK10-NEXT:    [[TMP66:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK10-NEXT:    [[TMP67:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK10-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP64]], i64* [[TMP69]], align 8
-// CHECK10-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP64]], i64* [[TMP71]], align 8
-// CHECK10-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP72]], align 8
-// CHECK10-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
-// CHECK10-NEXT:    store i8* null, i8** [[TMP73]], align 8
-// CHECK10-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK10-NEXT:    store i32 [[TMP63]], i32* [[CONV20]], align 4
+// CHECK10-NEXT:    [[TMP64:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK10-NEXT:    [[TMP65:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK10-NEXT:    [[TMP66:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES24]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP66]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i64 32, i1 false)
+// CHECK10-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP62]], i64* [[TMP68]], align 8
+// CHECK10-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP62]], i64* [[TMP70]], align 8
+// CHECK10-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
+// CHECK10-NEXT:    store i8* null, i8** [[TMP71]], align 8
+// CHECK10-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK10-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP64]], i64* [[TMP73]], align 8
+// CHECK10-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP66]], i64* [[TMP75]], align 8
-// CHECK10-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
-// CHECK10-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP66]], i64* [[TMP77]], align 8
-// CHECK10-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 4, i64* [[TMP78]], align 8
-// CHECK10-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP79]], align 8
-// CHECK10-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP81]], align 8
-// CHECK10-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP83]], align 8
-// CHECK10-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 8, i64* [[TMP84]], align 8
-// CHECK10-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP85]], align 8
-// CHECK10-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
-// CHECK10-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP87]], align 8
-// CHECK10-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
-// CHECK10-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP89]], align 8
-// CHECK10-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 3
-// CHECK10-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK10-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
-// CHECK10-NEXT:    store i8* null, i8** [[TMP91]], align 8
-// CHECK10-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP95:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP95]], i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK10-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK10-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP96]], 0
+// CHECK10-NEXT:    store i64 [[TMP64]], i64* [[TMP75]], align 8
+// CHECK10-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP76]], align 8
+// CHECK10-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
+// CHECK10-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP80]], align 8
+// CHECK10-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP81]], align 8
+// CHECK10-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 8
+// CHECK10-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP85]], align 8
+// CHECK10-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 3
+// CHECK10-NEXT:    store i64 [[TMP65]], i64* [[TMP86]], align 8
+// CHECK10-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
+// CHECK10-NEXT:    store i8* null, i8** [[TMP87]], align 8
+// CHECK10-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP91:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP91]], i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK10-NEXT:    [[TMP92:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK10-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP92]], 0
 // CHECK10-NEXT:    [[DIV29:%.*]] = sdiv i32 [[SUB28]], 1
 // CHECK10-NEXT:    [[SUB30:%.*]] = sub nsw i32 [[DIV29]], 1
 // CHECK10-NEXT:    store i32 [[SUB30]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK10-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK10-NEXT:    [[ADD31:%.*]] = add nsw i32 [[TMP97]], 1
-// CHECK10-NEXT:    [[TMP98:%.*]] = zext i32 [[ADD31]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP98]])
-// CHECK10-NEXT:    [[TMP99:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l118.region_id, i32 4, i8** [[TMP92]], i8** [[TMP93]], i64* [[TMP94]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK10-NEXT:    [[TMP100:%.*]] = icmp ne i32 [[TMP99]], 0
-// CHECK10-NEXT:    br i1 [[TMP100]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
+// CHECK10-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK10-NEXT:    [[ADD31:%.*]] = add nsw i32 [[TMP93]], 1
+// CHECK10-NEXT:    [[TMP94:%.*]] = zext i32 [[ADD31]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP94]])
+// CHECK10-NEXT:    [[TMP95:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l118.region_id, i32 4, i8** [[TMP88]], i8** [[TMP89]], i64* [[TMP90]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP96:%.*]] = icmp ne i32 [[TMP95]], 0
+// CHECK10-NEXT:    br i1 [[TMP96]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
 // CHECK10:       omp_offload.failed32:
-// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l118(i64 [[TMP64]], i64 [[TMP66]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l118(i64 [[TMP62]], i64 [[TMP64]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT33]]
 // CHECK10:       omp_offload.cont33:
-// CHECK10-NEXT:    [[TMP101:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP101]])
+// CHECK10-NEXT:    [[TMP97:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP97]])
 // CHECK10-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    [[TMP102:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP102]])
-// CHECK10-NEXT:    [[TMP103:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    ret i32 [[TMP103]]
+// CHECK10-NEXT:    [[TMP98:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP98]])
+// CHECK10-NEXT:    [[TMP99:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK10-NEXT:    ret i32 [[TMP99]]
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108
@@ -5410,11 +5394,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK10-NEXT:    store i32 [[TMP3]], i32* [[CONV2]], align 4
 // CHECK10-NEXT:    [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -5496,7 +5480,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK10-NEXT:    store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !24
 // CHECK10-NEXT:    [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !24
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]), !llvm.access.group !24
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]), !llvm.access.group !24
 // CHECK10-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK10:       omp.inner.for.inc:
 // CHECK10-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24
@@ -5550,7 +5534,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -5671,7 +5655,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -5702,7 +5686,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l86.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l86.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK10-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK10-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
@@ -5720,7 +5704,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK10-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l91.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.13, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l91.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK10-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK10-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK10:       omp_offload.failed5:
@@ -5750,7 +5734,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK10-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l96.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK10-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l96.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK10-NEXT:    [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
 // CHECK10-NEXT:    br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK10:       omp_offload.failed11:
@@ -5766,11 +5750,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK10-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK10-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -5818,7 +5802,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK10-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30
 // CHECK10-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !30
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !30
 // CHECK10-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK10:       omp.inner.for.inc:
 // CHECK10-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30
@@ -5840,7 +5824,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -5930,11 +5914,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK10-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK10-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -5982,7 +5966,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK10-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36
 // CHECK10-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !36
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !36
 // CHECK10-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK10:       omp.inner.for.inc:
 // CHECK10-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36
@@ -6004,7 +5988,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -6105,11 +6089,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK10-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK10-NEXT:    [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -6165,7 +6149,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK10-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !42
 // CHECK10-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !42
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !42
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !42
 // CHECK10-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK10:       omp.inner.for.inc:
 // CHECK10-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42
@@ -6208,7 +6192,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -6296,7 +6280,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK10-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK10-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK10-NEXT:    ret void
@@ -6352,14 +6336,14 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK11-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK11-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK11-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK11-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK11-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK11-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK11-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK11-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK11-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -6368,173 +6352,165 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK11-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK11-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK11-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK11-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK11-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK11-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK11-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK11-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK11-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK11-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK11-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK11-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK11-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK11-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK11-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK11-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK11-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK11-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK11-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i32 24, i1 false)
 // CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK11-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK11-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK11-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK11-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK11-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK11-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK11-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK11-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK11-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK11-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK11-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK11-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK11-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK11-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK11-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK11-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK11-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK11-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK11-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK11-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK11-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK11-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK11-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP61]])
-// CHECK11-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l113.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK11-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK11-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK11-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK11-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP59]])
+// CHECK11-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l113.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK11-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK11:       omp_offload.failed15:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l113(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l113(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK11:       omp_offload.cont16:
-// CHECK11-NEXT:    [[TMP64:%.*]] = load i32, i32* [[M]], align 4
-// CHECK11-NEXT:    store i32 [[TMP64]], i32* [[M_CASTED]], align 4
-// CHECK11-NEXT:    [[TMP65:%.*]] = load i32, i32* [[M_CASTED]], align 4
-// CHECK11-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP66]], i32* [[N_CASTED17]], align 4
-// CHECK11-NEXT:    [[TMP67:%.*]] = load i32, i32* [[N_CASTED17]], align 4
-// CHECK11-NEXT:    [[TMP68:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK11-NEXT:    [[TMP69:%.*]] = sext i32 [[TMP68]] to i64
-// CHECK11-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP65]], i32* [[TMP71]], align 4
-// CHECK11-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP65]], i32* [[TMP73]], align 4
-// CHECK11-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP74]], align 4
-// CHECK11-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP75]], align 4
-// CHECK11-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP62:%.*]] = load i32, i32* [[M]], align 4
+// CHECK11-NEXT:    store i32 [[TMP62]], i32* [[M_CASTED]], align 4
+// CHECK11-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M_CASTED]], align 4
+// CHECK11-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP64]], i32* [[N_CASTED17]], align 4
+// CHECK11-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N_CASTED17]], align 4
+// CHECK11-NEXT:    [[TMP66:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK11-NEXT:    [[TMP67:%.*]] = sext i32 [[TMP66]] to i64
+// CHECK11-NEXT:    [[TMP68:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES21]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP68]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i32 32, i1 false)
+// CHECK11-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP63]], i32* [[TMP70]], align 4
+// CHECK11-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP63]], i32* [[TMP72]], align 4
+// CHECK11-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP73]], align 4
+// CHECK11-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP65]], i32* [[TMP75]], align 4
+// CHECK11-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP67]], i32* [[TMP77]], align 4
-// CHECK11-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP67]], i32* [[TMP79]], align 4
-// CHECK11-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP80]], align 4
-// CHECK11-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP81]], align 4
-// CHECK11-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP83]], align 4
-// CHECK11-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP85]], align 4
-// CHECK11-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 4, i64* [[TMP86]], align 4
-// CHECK11-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP87]], align 4
-// CHECK11-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP89]], align 4
-// CHECK11-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP91]], align 4
-// CHECK11-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 3
-// CHECK11-NEXT:    store i64 [[TMP69]], i64* [[TMP92]], align 4
-// CHECK11-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP93]], align 4
-// CHECK11-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP97:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP97]], i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK11-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK11-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP98]], 0
+// CHECK11-NEXT:    store i32 [[TMP65]], i32* [[TMP77]], align 4
+// CHECK11-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP78]], align 4
+// CHECK11-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
+// CHECK11-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP82]], align 4
+// CHECK11-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP83]], align 4
+// CHECK11-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP85]], align 4
+// CHECK11-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP87]], align 4
+// CHECK11-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 3
+// CHECK11-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 4
+// CHECK11-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP89]], align 4
+// CHECK11-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP93:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP93]], i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK11-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK11-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP94]], 0
 // CHECK11-NEXT:    [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1
 // CHECK11-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[DIV26]], 1
 // CHECK11-NEXT:    store i32 [[SUB27]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK11-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK11-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP99]], 1
-// CHECK11-NEXT:    [[TMP100:%.*]] = zext i32 [[ADD28]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP100]])
-// CHECK11-NEXT:    [[TMP101:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l118.region_id, i32 4, i8** [[TMP94]], i8** [[TMP95]], i64* [[TMP96]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK11-NEXT:    [[TMP102:%.*]] = icmp ne i32 [[TMP101]], 0
-// CHECK11-NEXT:    br i1 [[TMP102]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
+// CHECK11-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK11-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP95]], 1
+// CHECK11-NEXT:    [[TMP96:%.*]] = zext i32 [[ADD28]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP96]])
+// CHECK11-NEXT:    [[TMP97:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l118.region_id, i32 4, i8** [[TMP90]], i8** [[TMP91]], i64* [[TMP92]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP98:%.*]] = icmp ne i32 [[TMP97]], 0
+// CHECK11-NEXT:    br i1 [[TMP98]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
 // CHECK11:       omp_offload.failed29:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l118(i32 [[TMP65]], i32 [[TMP67]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l118(i32 [[TMP63]], i32 [[TMP65]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT30]]
 // CHECK11:       omp_offload.cont30:
-// CHECK11-NEXT:    [[TMP103:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP103]])
+// CHECK11-NEXT:    [[TMP99:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP99]])
 // CHECK11-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    [[TMP104:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP104]])
-// CHECK11-NEXT:    [[TMP105:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    ret i32 [[TMP105]]
+// CHECK11-NEXT:    [[TMP100:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP100]])
+// CHECK11-NEXT:    [[TMP101:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK11-NEXT:    ret i32 [[TMP101]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108
@@ -7017,11 +6993,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK11-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK11-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -7099,7 +7075,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !25
 // CHECK11-NEXT:    store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25
 // CHECK11-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]), !llvm.access.group !25
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]), !llvm.access.group !25
 // CHECK11-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK11:       omp.inner.for.inc:
 // CHECK11-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
@@ -7153,7 +7129,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -7270,7 +7246,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -7301,7 +7277,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l86.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l86.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK11-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK11-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
@@ -7319,7 +7295,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK11-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l91.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.13, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l91.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK11-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK11-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK11:       omp_offload.failed5:
@@ -7348,7 +7324,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK11-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l96.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK11-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l96.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK11-NEXT:    [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
 // CHECK11-NEXT:    br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK11:       omp_offload.failed11:
@@ -7364,11 +7340,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK11-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -7414,7 +7390,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.body:
 // CHECK11-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31
 // CHECK11-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !31
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !31
 // CHECK11-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK11:       omp.inner.for.inc:
 // CHECK11-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31
@@ -7436,7 +7412,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -7523,11 +7499,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK11-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -7573,7 +7549,7 @@ int main (int argc, char **argv) {
 // CHECK11:       omp.inner.for.body:
 // CHECK11-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !37
 // CHECK11-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !37
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !37
 // CHECK11-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK11:       omp.inner.for.inc:
 // CHECK11-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37
@@ -7595,7 +7571,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -7691,11 +7667,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK11-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK11-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -7747,7 +7723,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !43
 // CHECK11-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43
 // CHECK11-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !43
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !43
 // CHECK11-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK11:       omp.inner.for.inc:
 // CHECK11-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43
@@ -7790,7 +7766,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -7874,7 +7850,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK11-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK11-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK11-NEXT:    ret void
@@ -7930,14 +7906,14 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK12-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK12-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK12-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK12-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK12-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK12-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK12-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK12-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK12-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK12-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -7946,173 +7922,165 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK12-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK12-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK12-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK12-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK12-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK12-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK12-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK12-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK12-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK12-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK12-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK12-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK12-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK12-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK12-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
 // CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK12:       omp_offload.cont:
-// CHECK12-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK12-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK12-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK12-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK12-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK12-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK12-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK12-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK12-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i32 24, i1 false)
 // CHECK12-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK12-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK12-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK12-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK12-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK12-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK12-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK12-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK12-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK12-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK12-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK12-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK12-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK12-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK12-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK12-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK12-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK12-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK12-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK12-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK12-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK12-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK12-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK12-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK12-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK12-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK12-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK12-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK12-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK12-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK12-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK12-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK12-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK12-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK12-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK12-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK12-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK12-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK12-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK12-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP61]])
-// CHECK12-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l113.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK12-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK12-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK12-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK12-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP59]])
+// CHECK12-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l113.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK12-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK12:       omp_offload.failed15:
-// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l113(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l113(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK12:       omp_offload.cont16:
-// CHECK12-NEXT:    [[TMP64:%.*]] = load i32, i32* [[M]], align 4
-// CHECK12-NEXT:    store i32 [[TMP64]], i32* [[M_CASTED]], align 4
-// CHECK12-NEXT:    [[TMP65:%.*]] = load i32, i32* [[M_CASTED]], align 4
-// CHECK12-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP66]], i32* [[N_CASTED17]], align 4
-// CHECK12-NEXT:    [[TMP67:%.*]] = load i32, i32* [[N_CASTED17]], align 4
-// CHECK12-NEXT:    [[TMP68:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK12-NEXT:    [[TMP69:%.*]] = sext i32 [[TMP68]] to i64
-// CHECK12-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP65]], i32* [[TMP71]], align 4
-// CHECK12-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP65]], i32* [[TMP73]], align 4
-// CHECK12-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP74]], align 4
-// CHECK12-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
-// CHECK12-NEXT:    store i8* null, i8** [[TMP75]], align 4
-// CHECK12-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP62:%.*]] = load i32, i32* [[M]], align 4
+// CHECK12-NEXT:    store i32 [[TMP62]], i32* [[M_CASTED]], align 4
+// CHECK12-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M_CASTED]], align 4
+// CHECK12-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP64]], i32* [[N_CASTED17]], align 4
+// CHECK12-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N_CASTED17]], align 4
+// CHECK12-NEXT:    [[TMP66:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK12-NEXT:    [[TMP67:%.*]] = sext i32 [[TMP66]] to i64
+// CHECK12-NEXT:    [[TMP68:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES21]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP68]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i32 32, i1 false)
+// CHECK12-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP63]], i32* [[TMP70]], align 4
+// CHECK12-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP63]], i32* [[TMP72]], align 4
+// CHECK12-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
+// CHECK12-NEXT:    store i8* null, i8** [[TMP73]], align 4
+// CHECK12-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP65]], i32* [[TMP75]], align 4
+// CHECK12-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP67]], i32* [[TMP77]], align 4
-// CHECK12-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
-// CHECK12-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP67]], i32* [[TMP79]], align 4
-// CHECK12-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP80]], align 4
-// CHECK12-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP81]], align 4
-// CHECK12-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP83]], align 4
-// CHECK12-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP85]], align 4
-// CHECK12-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 4, i64* [[TMP86]], align 4
-// CHECK12-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP87]], align 4
-// CHECK12-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
-// CHECK12-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP89]], align 4
-// CHECK12-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
-// CHECK12-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP91]], align 4
-// CHECK12-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 3
-// CHECK12-NEXT:    store i64 [[TMP69]], i64* [[TMP92]], align 4
-// CHECK12-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
-// CHECK12-NEXT:    store i8* null, i8** [[TMP93]], align 4
-// CHECK12-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP97:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP97]], i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK12-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK12-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP98]], 0
+// CHECK12-NEXT:    store i32 [[TMP65]], i32* [[TMP77]], align 4
+// CHECK12-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP78]], align 4
+// CHECK12-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
+// CHECK12-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP82]], align 4
+// CHECK12-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP83]], align 4
+// CHECK12-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP85]], align 4
+// CHECK12-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP87]], align 4
+// CHECK12-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 3
+// CHECK12-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 4
+// CHECK12-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
+// CHECK12-NEXT:    store i8* null, i8** [[TMP89]], align 4
+// CHECK12-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP93:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP93]], i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK12-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK12-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP94]], 0
 // CHECK12-NEXT:    [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1
 // CHECK12-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[DIV26]], 1
 // CHECK12-NEXT:    store i32 [[SUB27]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK12-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK12-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP99]], 1
-// CHECK12-NEXT:    [[TMP100:%.*]] = zext i32 [[ADD28]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP100]])
-// CHECK12-NEXT:    [[TMP101:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l118.region_id, i32 4, i8** [[TMP94]], i8** [[TMP95]], i64* [[TMP96]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK12-NEXT:    [[TMP102:%.*]] = icmp ne i32 [[TMP101]], 0
-// CHECK12-NEXT:    br i1 [[TMP102]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
+// CHECK12-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK12-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP95]], 1
+// CHECK12-NEXT:    [[TMP96:%.*]] = zext i32 [[ADD28]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP96]])
+// CHECK12-NEXT:    [[TMP97:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l118.region_id, i32 4, i8** [[TMP90]], i8** [[TMP91]], i64* [[TMP92]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP98:%.*]] = icmp ne i32 [[TMP97]], 0
+// CHECK12-NEXT:    br i1 [[TMP98]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
 // CHECK12:       omp_offload.failed29:
-// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l118(i32 [[TMP65]], i32 [[TMP67]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l118(i32 [[TMP63]], i32 [[TMP65]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT30]]
 // CHECK12:       omp_offload.cont30:
-// CHECK12-NEXT:    [[TMP103:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP103]])
+// CHECK12-NEXT:    [[TMP99:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP99]])
 // CHECK12-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    [[TMP104:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP104]])
-// CHECK12-NEXT:    [[TMP105:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    ret i32 [[TMP105]]
+// CHECK12-NEXT:    [[TMP100:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP100]])
+// CHECK12-NEXT:    [[TMP101:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK12-NEXT:    ret i32 [[TMP101]]
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l108
@@ -8595,11 +8563,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK12-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK12-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -8677,7 +8645,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !25
 // CHECK12-NEXT:    store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25
 // CHECK12-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]), !llvm.access.group !25
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]), !llvm.access.group !25
 // CHECK12-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK12:       omp.inner.for.inc:
 // CHECK12-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25
@@ -8731,7 +8699,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -8848,7 +8816,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK12-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -8879,7 +8847,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l86.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l86.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.12, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK12-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK12-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
@@ -8897,7 +8865,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK12-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l91.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.13, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.14, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l91.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.16, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK12-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK12-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK12:       omp_offload.failed5:
@@ -8926,7 +8894,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK12-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l96.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.17, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.18, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK12-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l96.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK12-NEXT:    [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
 // CHECK12-NEXT:    br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK12:       omp_offload.failed11:
@@ -8942,11 +8910,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK12-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK12-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -8992,7 +8960,7 @@ int main (int argc, char **argv) {
 // CHECK12:       omp.inner.for.body:
 // CHECK12-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31
 // CHECK12-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !31
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !31
 // CHECK12-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK12:       omp.inner.for.inc:
 // CHECK12-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31
@@ -9014,7 +8982,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -9101,11 +9069,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK12-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK12-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -9151,7 +9119,7 @@ int main (int argc, char **argv) {
 // CHECK12:       omp.inner.for.body:
 // CHECK12-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !37
 // CHECK12-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !37
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !37
 // CHECK12-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK12:       omp.inner.for.inc:
 // CHECK12-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37
@@ -9173,7 +9141,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -9269,11 +9237,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK12-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK12-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -9325,7 +9293,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !43
 // CHECK12-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43
 // CHECK12-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !43
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !43
 // CHECK12-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK12:       omp.inner.for.inc:
 // CHECK12-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43
@@ -9368,7 +9336,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..16
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -9452,7 +9420,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK12-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK12-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK12-NEXT:    ret void

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp
index c523576d206d0..8ff44940e7de1 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp
@@ -8820,14 +8820,14 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK13-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK13-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK13-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK13-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK13-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK13-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK13-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK13-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK13-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK13-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK13-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK13-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK13-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -8836,300 +8836,286 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK13-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK13-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK13-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK13-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK13-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK13-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK13-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK13-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK13-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK13-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK13-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK13-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK13-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK13-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK13-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK13-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK13-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK13-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK13-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK13-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK13-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK13-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK13-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK13-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK13-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK13-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK13-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK13-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK13-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK13-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK13-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK13-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK13-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK13-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK13-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK13-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK13-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK13-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK13-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK13:       omp_offload.failed:
 // CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK13:       omp_offload.cont:
-// CHECK13-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK13-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK13-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK13-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK13-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK13-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK13-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK13-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK13-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK13-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i64 24, i1 false)
 // CHECK13-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK13-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK13-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK13-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK13-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK13-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK13-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK13-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK13-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK13-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK13-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK13-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK13-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK13-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK13-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK13-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK13-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK13-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK13-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK13-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK13-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK13-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK13-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK13-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK13-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK13-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK13-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK13-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK13-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK13-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK13-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK13-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK13-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK13-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK13-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK13-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK13-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK13-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK13-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK13-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK13-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK13-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK13-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK13-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK13-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK13-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK13-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK13-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP60]])
-// CHECK13-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK13-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK13-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK13-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK13-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP58]])
+// CHECK13-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK13-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK13:       omp_offload.failed16:
-// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK13:       omp_offload.cont17:
-// CHECK13-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M]], align 4
+// CHECK13-NEXT:    [[TMP61:%.*]] = load i32, i32* [[M]], align 4
 // CHECK13-NEXT:    [[CONV18:%.*]] = bitcast i64* [[M_CASTED]] to i32*
-// CHECK13-NEXT:    store i32 [[TMP63]], i32* [[CONV18]], align 4
-// CHECK13-NEXT:    [[TMP64:%.*]] = load i64, i64* [[M_CASTED]], align 8
-// CHECK13-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    store i32 [[TMP61]], i32* [[CONV18]], align 4
+// CHECK13-NEXT:    [[TMP62:%.*]] = load i64, i64* [[M_CASTED]], align 8
+// CHECK13-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
 // CHECK13-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK13-NEXT:    store i32 [[TMP65]], i32* [[CONV20]], align 4
-// CHECK13-NEXT:    [[TMP66:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK13-NEXT:    [[TMP67:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK13-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP64]], i64* [[TMP69]], align 8
-// CHECK13-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP64]], i64* [[TMP71]], align 8
-// CHECK13-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
-// CHECK13-NEXT:    store i64 4, i64* [[TMP72]], align 8
-// CHECK13-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
-// CHECK13-NEXT:    store i8* null, i8** [[TMP73]], align 8
-// CHECK13-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK13-NEXT:    store i32 [[TMP63]], i32* [[CONV20]], align 4
+// CHECK13-NEXT:    [[TMP64:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK13-NEXT:    [[TMP65:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK13-NEXT:    [[TMP66:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES24]] to i8*
+// CHECK13-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP66]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i64 32, i1 false)
+// CHECK13-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP62]], i64* [[TMP68]], align 8
+// CHECK13-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP62]], i64* [[TMP70]], align 8
+// CHECK13-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
+// CHECK13-NEXT:    store i8* null, i8** [[TMP71]], align 8
+// CHECK13-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP64]], i64* [[TMP73]], align 8
+// CHECK13-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
 // CHECK13-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP66]], i64* [[TMP75]], align 8
-// CHECK13-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
-// CHECK13-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP66]], i64* [[TMP77]], align 8
-// CHECK13-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 1
-// CHECK13-NEXT:    store i64 4, i64* [[TMP78]], align 8
-// CHECK13-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
-// CHECK13-NEXT:    store i8* null, i8** [[TMP79]], align 8
-// CHECK13-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP81]], align 8
-// CHECK13-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP83]], align 8
-// CHECK13-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 2
-// CHECK13-NEXT:    store i64 8, i64* [[TMP84]], align 8
-// CHECK13-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
-// CHECK13-NEXT:    store i8* null, i8** [[TMP85]], align 8
-// CHECK13-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
-// CHECK13-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP87]], align 8
-// CHECK13-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
-// CHECK13-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP89]], align 8
-// CHECK13-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 3
-// CHECK13-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK13-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
-// CHECK13-NEXT:    store i8* null, i8** [[TMP91]], align 8
-// CHECK13-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP95:%.*]] = load i32, i32* [[N]], align 4
-// CHECK13-NEXT:    store i32 [[TMP95]], i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK13-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK13-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP96]], 0
+// CHECK13-NEXT:    store i64 [[TMP64]], i64* [[TMP75]], align 8
+// CHECK13-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
+// CHECK13-NEXT:    store i8* null, i8** [[TMP76]], align 8
+// CHECK13-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
+// CHECK13-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP80]], align 8
+// CHECK13-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
+// CHECK13-NEXT:    store i8* null, i8** [[TMP81]], align 8
+// CHECK13-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
+// CHECK13-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 8
+// CHECK13-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
+// CHECK13-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP85]], align 8
+// CHECK13-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 3
+// CHECK13-NEXT:    store i64 [[TMP65]], i64* [[TMP86]], align 8
+// CHECK13-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
+// CHECK13-NEXT:    store i8* null, i8** [[TMP87]], align 8
+// CHECK13-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP91:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    store i32 [[TMP91]], i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK13-NEXT:    [[TMP92:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK13-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP92]], 0
 // CHECK13-NEXT:    [[DIV29:%.*]] = sdiv i32 [[SUB28]], 1
 // CHECK13-NEXT:    [[SUB30:%.*]] = sub nsw i32 [[DIV29]], 1
 // CHECK13-NEXT:    store i32 [[SUB30]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK13-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK13-NEXT:    [[ADD31:%.*]] = add nsw i32 [[TMP97]], 1
-// CHECK13-NEXT:    [[TMP98:%.*]] = zext i32 [[ADD31]] to i64
-// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP98]])
-// CHECK13-NEXT:    [[TMP99:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164.region_id, i32 4, i8** [[TMP92]], i8** [[TMP93]], i64* [[TMP94]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP100:%.*]] = icmp ne i32 [[TMP99]], 0
-// CHECK13-NEXT:    br i1 [[TMP100]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
+// CHECK13-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK13-NEXT:    [[ADD31:%.*]] = add nsw i32 [[TMP93]], 1
+// CHECK13-NEXT:    [[TMP94:%.*]] = zext i32 [[ADD31]] to i64
+// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP94]])
+// CHECK13-NEXT:    [[TMP95:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164.region_id, i32 4, i8** [[TMP88]], i8** [[TMP89]], i64* [[TMP90]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP96:%.*]] = icmp ne i32 [[TMP95]], 0
+// CHECK13-NEXT:    br i1 [[TMP96]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
 // CHECK13:       omp_offload.failed32:
-// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164(i64 [[TMP64]], i64 [[TMP66]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164(i64 [[TMP62]], i64 [[TMP64]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT33]]
 // CHECK13:       omp_offload.cont33:
-// CHECK13-NEXT:    [[TMP101:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    [[TMP97:%.*]] = load i32, i32* [[N]], align 4
 // CHECK13-NEXT:    [[CONV35:%.*]] = bitcast i64* [[N_CASTED34]] to i32*
-// CHECK13-NEXT:    store i32 [[TMP101]], i32* [[CONV35]], align 4
-// CHECK13-NEXT:    [[TMP102:%.*]] = load i64, i64* [[N_CASTED34]], align 8
-// CHECK13-NEXT:    [[TMP103:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK13-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP102]], i64* [[TMP105]], align 8
-// CHECK13-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK13-NEXT:    store i32 [[TMP97]], i32* [[CONV35]], align 4
+// CHECK13-NEXT:    [[TMP98:%.*]] = load i64, i64* [[N_CASTED34]], align 8
+// CHECK13-NEXT:    [[TMP99:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK13-NEXT:    [[TMP100:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES39]] to i8*
+// CHECK13-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP100]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i64 24, i1 false)
+// CHECK13-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP98]], i64* [[TMP102]], align 8
+// CHECK13-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP98]], i64* [[TMP104]], align 8
+// CHECK13-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 0
+// CHECK13-NEXT:    store i8* null, i8** [[TMP105]], align 8
+// CHECK13-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 1
 // CHECK13-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP102]], i64* [[TMP107]], align 8
-// CHECK13-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 0
-// CHECK13-NEXT:    store i64 4, i64* [[TMP108]], align 8
-// CHECK13-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 0
-// CHECK13-NEXT:    store i8* null, i8** [[TMP109]], align 8
-// CHECK13-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 1
-// CHECK13-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP111]], align 8
-// CHECK13-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 1
-// CHECK13-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP113]], align 8
-// CHECK13-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 1
-// CHECK13-NEXT:    store i64 8, i64* [[TMP114]], align 8
-// CHECK13-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 1
-// CHECK13-NEXT:    store i8* null, i8** [[TMP115]], align 8
-// CHECK13-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP117]], align 8
-// CHECK13-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP119]], align 8
-// CHECK13-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 2
-// CHECK13-NEXT:    store i64 [[TMP103]], i64* [[TMP120]], align 8
-// CHECK13-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 2
-// CHECK13-NEXT:    store i8* null, i8** [[TMP121]], align 8
-// CHECK13-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP125:%.*]] = load i32, i32* [[N]], align 4
-// CHECK13-NEXT:    store i32 [[TMP125]], i32* [[DOTCAPTURE_EXPR_41]], align 4
-// CHECK13-NEXT:    [[TMP126:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_41]], align 4
-// CHECK13-NEXT:    [[SUB43:%.*]] = sub nsw i32 [[TMP126]], 0
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP107]], align 8
+// CHECK13-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP109]], align 8
+// CHECK13-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 1
+// CHECK13-NEXT:    store i8* null, i8** [[TMP110]], align 8
+// CHECK13-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP112]], align 8
+// CHECK13-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 2
+// CHECK13-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP114]], align 8
+// CHECK13-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 2
+// CHECK13-NEXT:    store i64 [[TMP99]], i64* [[TMP115]], align 8
+// CHECK13-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 2
+// CHECK13-NEXT:    store i8* null, i8** [[TMP116]], align 8
+// CHECK13-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP120:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    store i32 [[TMP120]], i32* [[DOTCAPTURE_EXPR_41]], align 4
+// CHECK13-NEXT:    [[TMP121:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_41]], align 4
+// CHECK13-NEXT:    [[SUB43:%.*]] = sub nsw i32 [[TMP121]], 0
 // CHECK13-NEXT:    [[DIV44:%.*]] = sdiv i32 [[SUB43]], 1
 // CHECK13-NEXT:    [[SUB45:%.*]] = sub nsw i32 [[DIV44]], 1
 // CHECK13-NEXT:    store i32 [[SUB45]], i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK13-NEXT:    [[TMP127:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK13-NEXT:    [[ADD46:%.*]] = add nsw i32 [[TMP127]], 1
-// CHECK13-NEXT:    [[TMP128:%.*]] = zext i32 [[ADD46]] to i64
-// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP128]])
-// CHECK13-NEXT:    [[TMP129:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169.region_id, i32 3, i8** [[TMP122]], i8** [[TMP123]], i64* [[TMP124]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP130:%.*]] = icmp ne i32 [[TMP129]], 0
-// CHECK13-NEXT:    br i1 [[TMP130]], label [[OMP_OFFLOAD_FAILED47:%.*]], label [[OMP_OFFLOAD_CONT48:%.*]]
+// CHECK13-NEXT:    [[TMP122:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
+// CHECK13-NEXT:    [[ADD46:%.*]] = add nsw i32 [[TMP122]], 1
+// CHECK13-NEXT:    [[TMP123:%.*]] = zext i32 [[ADD46]] to i64
+// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP123]])
+// CHECK13-NEXT:    [[TMP124:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169.region_id, i32 3, i8** [[TMP117]], i8** [[TMP118]], i64* [[TMP119]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP125:%.*]] = icmp ne i32 [[TMP124]], 0
+// CHECK13-NEXT:    br i1 [[TMP125]], label [[OMP_OFFLOAD_FAILED47:%.*]], label [[OMP_OFFLOAD_CONT48:%.*]]
 // CHECK13:       omp_offload.failed47:
-// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169(i64 [[TMP102]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169(i64 [[TMP98]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT48]]
 // CHECK13:       omp_offload.cont48:
-// CHECK13-NEXT:    [[TMP131:%.*]] = load i32, i32* [[M]], align 4
+// CHECK13-NEXT:    [[TMP126:%.*]] = load i32, i32* [[M]], align 4
 // CHECK13-NEXT:    [[CONV50:%.*]] = bitcast i64* [[M_CASTED49]] to i32*
-// CHECK13-NEXT:    store i32 [[TMP131]], i32* [[CONV50]], align 4
-// CHECK13-NEXT:    [[TMP132:%.*]] = load i64, i64* [[M_CASTED49]], align 8
-// CHECK13-NEXT:    [[TMP133:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    store i32 [[TMP126]], i32* [[CONV50]], align 4
+// CHECK13-NEXT:    [[TMP127:%.*]] = load i64, i64* [[M_CASTED49]], align 8
+// CHECK13-NEXT:    [[TMP128:%.*]] = load i32, i32* [[N]], align 4
 // CHECK13-NEXT:    [[CONV52:%.*]] = bitcast i64* [[N_CASTED51]] to i32*
-// CHECK13-NEXT:    store i32 [[TMP133]], i32* [[CONV52]], align 4
-// CHECK13-NEXT:    [[TMP134:%.*]] = load i64, i64* [[N_CASTED51]], align 8
-// CHECK13-NEXT:    [[TMP135:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK13-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP132]], i64* [[TMP137]], align 8
-// CHECK13-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP132]], i64* [[TMP139]], align 8
-// CHECK13-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 0
-// CHECK13-NEXT:    store i64 4, i64* [[TMP140]], align 8
-// CHECK13-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 0
+// CHECK13-NEXT:    store i32 [[TMP128]], i32* [[CONV52]], align 4
+// CHECK13-NEXT:    [[TMP129:%.*]] = load i64, i64* [[N_CASTED51]], align 8
+// CHECK13-NEXT:    [[TMP130:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK13-NEXT:    [[TMP131:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES56]] to i8*
+// CHECK13-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP131]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i64 32, i1 false)
+// CHECK13-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP127]], i64* [[TMP133]], align 8
+// CHECK13-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP127]], i64* [[TMP135]], align 8
+// CHECK13-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 0
+// CHECK13-NEXT:    store i8* null, i8** [[TMP136]], align 8
+// CHECK13-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP129]], i64* [[TMP138]], align 8
+// CHECK13-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to i64*
+// CHECK13-NEXT:    store i64 [[TMP129]], i64* [[TMP140]], align 8
+// CHECK13-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 1
 // CHECK13-NEXT:    store i8* null, i8** [[TMP141]], align 8
-// CHECK13-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 1
+// CHECK13-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 2
 // CHECK13-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP134]], i64* [[TMP143]], align 8
-// CHECK13-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 1
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP143]], align 8
+// CHECK13-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 2
 // CHECK13-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP134]], i64* [[TMP145]], align 8
-// CHECK13-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 1
-// CHECK13-NEXT:    store i64 4, i64* [[TMP146]], align 8
-// CHECK13-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 1
-// CHECK13-NEXT:    store i8* null, i8** [[TMP147]], align 8
-// CHECK13-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP149]], align 8
-// CHECK13-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 2
-// CHECK13-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i64*
-// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP151]], align 8
-// CHECK13-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 2
-// CHECK13-NEXT:    store i64 8, i64* [[TMP152]], align 8
-// CHECK13-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 2
-// CHECK13-NEXT:    store i8* null, i8** [[TMP153]], align 8
-// CHECK13-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 3
-// CHECK13-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP155]], align 8
-// CHECK13-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 3
-// CHECK13-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32**
-// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP157]], align 8
-// CHECK13-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 3
-// CHECK13-NEXT:    store i64 [[TMP135]], i64* [[TMP158]], align 8
-// CHECK13-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 3
-// CHECK13-NEXT:    store i8* null, i8** [[TMP159]], align 8
-// CHECK13-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 0
-// CHECK13-NEXT:    [[TMP163:%.*]] = load i32, i32* [[N]], align 4
-// CHECK13-NEXT:    store i32 [[TMP163]], i32* [[DOTCAPTURE_EXPR_58]], align 4
-// CHECK13-NEXT:    [[TMP164:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_58]], align 4
-// CHECK13-NEXT:    [[SUB60:%.*]] = sub nsw i32 [[TMP164]], 0
+// CHECK13-NEXT:    store i64 [[TMP1]], i64* [[TMP145]], align 8
+// CHECK13-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 2
+// CHECK13-NEXT:    store i8* null, i8** [[TMP146]], align 8
+// CHECK13-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 3
+// CHECK13-NEXT:    [[TMP148:%.*]] = bitcast i8** [[TMP147]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP148]], align 8
+// CHECK13-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 3
+// CHECK13-NEXT:    [[TMP150:%.*]] = bitcast i8** [[TMP149]] to i32**
+// CHECK13-NEXT:    store i32* [[VLA]], i32** [[TMP150]], align 8
+// CHECK13-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 3
+// CHECK13-NEXT:    store i64 [[TMP130]], i64* [[TMP151]], align 8
+// CHECK13-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 3
+// CHECK13-NEXT:    store i8* null, i8** [[TMP152]], align 8
+// CHECK13-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 0
+// CHECK13-NEXT:    [[TMP156:%.*]] = load i32, i32* [[N]], align 4
+// CHECK13-NEXT:    store i32 [[TMP156]], i32* [[DOTCAPTURE_EXPR_58]], align 4
+// CHECK13-NEXT:    [[TMP157:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_58]], align 4
+// CHECK13-NEXT:    [[SUB60:%.*]] = sub nsw i32 [[TMP157]], 0
 // CHECK13-NEXT:    [[DIV61:%.*]] = sdiv i32 [[SUB60]], 1
 // CHECK13-NEXT:    [[SUB62:%.*]] = sub nsw i32 [[DIV61]], 1
 // CHECK13-NEXT:    store i32 [[SUB62]], i32* [[DOTCAPTURE_EXPR_59]], align 4
-// CHECK13-NEXT:    [[TMP165:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_59]], align 4
-// CHECK13-NEXT:    [[ADD63:%.*]] = add nsw i32 [[TMP165]], 1
-// CHECK13-NEXT:    [[TMP166:%.*]] = zext i32 [[ADD63]] to i64
-// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP166]])
-// CHECK13-NEXT:    [[TMP167:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174.region_id, i32 4, i8** [[TMP160]], i8** [[TMP161]], i64* [[TMP162]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK13-NEXT:    [[TMP168:%.*]] = icmp ne i32 [[TMP167]], 0
-// CHECK13-NEXT:    br i1 [[TMP168]], label [[OMP_OFFLOAD_FAILED64:%.*]], label [[OMP_OFFLOAD_CONT65:%.*]]
+// CHECK13-NEXT:    [[TMP158:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_59]], align 4
+// CHECK13-NEXT:    [[ADD63:%.*]] = add nsw i32 [[TMP158]], 1
+// CHECK13-NEXT:    [[TMP159:%.*]] = zext i32 [[ADD63]] to i64
+// CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP159]])
+// CHECK13-NEXT:    [[TMP160:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174.region_id, i32 4, i8** [[TMP153]], i8** [[TMP154]], i64* [[TMP155]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP161:%.*]] = icmp ne i32 [[TMP160]], 0
+// CHECK13-NEXT:    br i1 [[TMP161]], label [[OMP_OFFLOAD_FAILED64:%.*]], label [[OMP_OFFLOAD_CONT65:%.*]]
 // CHECK13:       omp_offload.failed64:
-// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174(i64 [[TMP132]], i64 [[TMP134]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK13-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174(i64 [[TMP127]], i64 [[TMP129]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK13-NEXT:    br label [[OMP_OFFLOAD_CONT65]]
 // CHECK13:       omp_offload.cont65:
-// CHECK13-NEXT:    [[TMP169:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK13-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP169]])
+// CHECK13-NEXT:    [[TMP162:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK13-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP162]])
 // CHECK13-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK13-NEXT:    [[TMP170:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK13-NEXT:    call void @llvm.stackrestore(i8* [[TMP170]])
-// CHECK13-NEXT:    [[TMP171:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK13-NEXT:    ret i32 [[TMP171]]
+// CHECK13-NEXT:    [[TMP163:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK13-NEXT:    call void @llvm.stackrestore(i8* [[TMP163]])
+// CHECK13-NEXT:    [[TMP164:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK13-NEXT:    ret i32 [[TMP164]]
 //
 //
 // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154
@@ -9627,11 +9613,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP3]], i32* [[CONV2]], align 4
 // CHECK13-NEXT:    [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9713,7 +9699,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !28
 // CHECK13-NEXT:    [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]), !llvm.access.group !28
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]), !llvm.access.group !28
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
@@ -9767,7 +9753,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9899,11 +9885,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32*
 // CHECK13-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
 // CHECK13-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -9975,7 +9961,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
 // CHECK13-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34
 // CHECK13-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !34
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !34
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
@@ -10006,7 +9992,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10142,11 +10128,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP3]], i32* [[CONV2]], align 4
 // CHECK13-NEXT:    [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10226,7 +10212,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP20]], i32* [[CONV7]], align 4, !llvm.access.group !40
 // CHECK13-NEXT:    [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]]), !llvm.access.group !40
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]]), !llvm.access.group !40
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40
@@ -10257,7 +10243,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10375,7 +10361,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK13-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK13-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK13-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK13-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -10415,7 +10401,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK13-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l122.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l122.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK13-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK13-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK13:       omp_offload.failed:
@@ -10433,7 +10419,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK13-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l127.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l127.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK13-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK13-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK13:       omp_offload.failed5:
@@ -10463,7 +10449,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK13-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l132.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l132.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK13-NEXT:    [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
 // CHECK13-NEXT:    br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK13:       omp_offload.failed11:
@@ -10481,7 +10467,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK13-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l137.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l137.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK13-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
 // CHECK13-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK13:       omp_offload.failed17:
@@ -10511,7 +10497,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
 // CHECK13-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
 // CHECK13-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK13-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l142.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK13-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l142.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK13-NEXT:    [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0
 // CHECK13-NEXT:    br i1 [[TMP58]], label [[OMP_OFFLOAD_FAILED25:%.*]], label [[OMP_OFFLOAD_CONT26:%.*]]
 // CHECK13:       omp_offload.failed25:
@@ -10527,11 +10513,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK13-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK13-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10579,7 +10565,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK13-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46
 // CHECK13-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
@@ -10601,7 +10587,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10691,11 +10677,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK13-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK13-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10743,7 +10729,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK13-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52
 // CHECK13-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52
@@ -10765,7 +10751,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10866,11 +10852,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK13-NEXT:    [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -10926,7 +10912,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58
 // CHECK13-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58
@@ -10948,7 +10934,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11063,11 +11049,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK13-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK13-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11115,7 +11101,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK13-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64
 // CHECK13-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64
@@ -11137,7 +11123,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11236,11 +11222,11 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK13-NEXT:    [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11296,7 +11282,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK13-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70
 // CHECK13-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70
-// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70
+// CHECK13-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70
 // CHECK13-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK13:       omp.inner.for.inc:
 // CHECK13-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70
@@ -11318,7 +11304,7 @@ int main (int argc, char **argv) {
 // CHECK13-NEXT:    ret void
 //
 //
-// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK13-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -11405,7 +11391,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK13-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK13-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK13-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK13-NEXT:  entry:
 // CHECK13-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK13-NEXT:    ret void
@@ -11479,14 +11465,14 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK14-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK14-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK14-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK14-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK14-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK14-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK14-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK14-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK14-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK14-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK14-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK14-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK14-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -11495,300 +11481,286 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK14-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK14-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK14-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK14-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK14-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK14-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK14-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK14-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK14-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK14-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK14-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK14-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK14-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK14-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK14-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK14-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK14-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK14-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK14-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK14-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK14-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK14-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK14-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK14-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK14-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK14-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK14-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK14-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK14-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK14-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK14-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK14-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK14-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK14-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK14-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK14-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK14-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK14-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK14-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK14-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK14:       omp_offload.failed:
 // CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK14-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK14:       omp_offload.cont:
-// CHECK14-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK14-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK14-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK14-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK14-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK14-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK14-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK14-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK14-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK14-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i64 24, i1 false)
 // CHECK14-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK14-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK14-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK14-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK14-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK14-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK14-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK14-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK14-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK14-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK14-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK14-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK14-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK14-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK14-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK14-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK14-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK14-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK14-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK14-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK14-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK14-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK14-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK14-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK14-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK14-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK14-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK14-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK14-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK14-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK14-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK14-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK14-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK14-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK14-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK14-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK14-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK14-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK14-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK14-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK14-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK14-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK14-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK14-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK14-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK14-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK14-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK14-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP60]])
-// CHECK14-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK14-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK14-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK14-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK14-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK14-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP58]])
+// CHECK14-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK14-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK14:       omp_offload.failed16:
-// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK14-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK14:       omp_offload.cont17:
-// CHECK14-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M]], align 4
+// CHECK14-NEXT:    [[TMP61:%.*]] = load i32, i32* [[M]], align 4
 // CHECK14-NEXT:    [[CONV18:%.*]] = bitcast i64* [[M_CASTED]] to i32*
-// CHECK14-NEXT:    store i32 [[TMP63]], i32* [[CONV18]], align 4
-// CHECK14-NEXT:    [[TMP64:%.*]] = load i64, i64* [[M_CASTED]], align 8
-// CHECK14-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    store i32 [[TMP61]], i32* [[CONV18]], align 4
+// CHECK14-NEXT:    [[TMP62:%.*]] = load i64, i64* [[M_CASTED]], align 8
+// CHECK14-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
 // CHECK14-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK14-NEXT:    store i32 [[TMP65]], i32* [[CONV20]], align 4
-// CHECK14-NEXT:    [[TMP66:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK14-NEXT:    [[TMP67:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK14-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP64]], i64* [[TMP69]], align 8
-// CHECK14-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP64]], i64* [[TMP71]], align 8
-// CHECK14-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
-// CHECK14-NEXT:    store i64 4, i64* [[TMP72]], align 8
-// CHECK14-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
-// CHECK14-NEXT:    store i8* null, i8** [[TMP73]], align 8
-// CHECK14-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK14-NEXT:    store i32 [[TMP63]], i32* [[CONV20]], align 4
+// CHECK14-NEXT:    [[TMP64:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK14-NEXT:    [[TMP65:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK14-NEXT:    [[TMP66:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES24]] to i8*
+// CHECK14-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP66]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i64 32, i1 false)
+// CHECK14-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP62]], i64* [[TMP68]], align 8
+// CHECK14-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP62]], i64* [[TMP70]], align 8
+// CHECK14-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
+// CHECK14-NEXT:    store i8* null, i8** [[TMP71]], align 8
+// CHECK14-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP64]], i64* [[TMP73]], align 8
+// CHECK14-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
 // CHECK14-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP66]], i64* [[TMP75]], align 8
-// CHECK14-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
-// CHECK14-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP66]], i64* [[TMP77]], align 8
-// CHECK14-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 1
-// CHECK14-NEXT:    store i64 4, i64* [[TMP78]], align 8
-// CHECK14-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
-// CHECK14-NEXT:    store i8* null, i8** [[TMP79]], align 8
-// CHECK14-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP81]], align 8
-// CHECK14-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP83]], align 8
-// CHECK14-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 2
-// CHECK14-NEXT:    store i64 8, i64* [[TMP84]], align 8
-// CHECK14-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
-// CHECK14-NEXT:    store i8* null, i8** [[TMP85]], align 8
-// CHECK14-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
-// CHECK14-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP87]], align 8
-// CHECK14-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
-// CHECK14-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP89]], align 8
-// CHECK14-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 3
-// CHECK14-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK14-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
-// CHECK14-NEXT:    store i8* null, i8** [[TMP91]], align 8
-// CHECK14-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP95:%.*]] = load i32, i32* [[N]], align 4
-// CHECK14-NEXT:    store i32 [[TMP95]], i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK14-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK14-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP96]], 0
+// CHECK14-NEXT:    store i64 [[TMP64]], i64* [[TMP75]], align 8
+// CHECK14-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
+// CHECK14-NEXT:    store i8* null, i8** [[TMP76]], align 8
+// CHECK14-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
+// CHECK14-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP80]], align 8
+// CHECK14-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
+// CHECK14-NEXT:    store i8* null, i8** [[TMP81]], align 8
+// CHECK14-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
+// CHECK14-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 8
+// CHECK14-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
+// CHECK14-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP85]], align 8
+// CHECK14-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 3
+// CHECK14-NEXT:    store i64 [[TMP65]], i64* [[TMP86]], align 8
+// CHECK14-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
+// CHECK14-NEXT:    store i8* null, i8** [[TMP87]], align 8
+// CHECK14-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP91:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    store i32 [[TMP91]], i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK14-NEXT:    [[TMP92:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK14-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP92]], 0
 // CHECK14-NEXT:    [[DIV29:%.*]] = sdiv i32 [[SUB28]], 1
 // CHECK14-NEXT:    [[SUB30:%.*]] = sub nsw i32 [[DIV29]], 1
 // CHECK14-NEXT:    store i32 [[SUB30]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK14-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK14-NEXT:    [[ADD31:%.*]] = add nsw i32 [[TMP97]], 1
-// CHECK14-NEXT:    [[TMP98:%.*]] = zext i32 [[ADD31]] to i64
-// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP98]])
-// CHECK14-NEXT:    [[TMP99:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164.region_id, i32 4, i8** [[TMP92]], i8** [[TMP93]], i64* [[TMP94]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK14-NEXT:    [[TMP100:%.*]] = icmp ne i32 [[TMP99]], 0
-// CHECK14-NEXT:    br i1 [[TMP100]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
+// CHECK14-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK14-NEXT:    [[ADD31:%.*]] = add nsw i32 [[TMP93]], 1
+// CHECK14-NEXT:    [[TMP94:%.*]] = zext i32 [[ADD31]] to i64
+// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP94]])
+// CHECK14-NEXT:    [[TMP95:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164.region_id, i32 4, i8** [[TMP88]], i8** [[TMP89]], i64* [[TMP90]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP96:%.*]] = icmp ne i32 [[TMP95]], 0
+// CHECK14-NEXT:    br i1 [[TMP96]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
 // CHECK14:       omp_offload.failed32:
-// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164(i64 [[TMP64]], i64 [[TMP66]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164(i64 [[TMP62]], i64 [[TMP64]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK14-NEXT:    br label [[OMP_OFFLOAD_CONT33]]
 // CHECK14:       omp_offload.cont33:
-// CHECK14-NEXT:    [[TMP101:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    [[TMP97:%.*]] = load i32, i32* [[N]], align 4
 // CHECK14-NEXT:    [[CONV35:%.*]] = bitcast i64* [[N_CASTED34]] to i32*
-// CHECK14-NEXT:    store i32 [[TMP101]], i32* [[CONV35]], align 4
-// CHECK14-NEXT:    [[TMP102:%.*]] = load i64, i64* [[N_CASTED34]], align 8
-// CHECK14-NEXT:    [[TMP103:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK14-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP102]], i64* [[TMP105]], align 8
-// CHECK14-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK14-NEXT:    store i32 [[TMP97]], i32* [[CONV35]], align 4
+// CHECK14-NEXT:    [[TMP98:%.*]] = load i64, i64* [[N_CASTED34]], align 8
+// CHECK14-NEXT:    [[TMP99:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK14-NEXT:    [[TMP100:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES39]] to i8*
+// CHECK14-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP100]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i64 24, i1 false)
+// CHECK14-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP98]], i64* [[TMP102]], align 8
+// CHECK14-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP98]], i64* [[TMP104]], align 8
+// CHECK14-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 0
+// CHECK14-NEXT:    store i8* null, i8** [[TMP105]], align 8
+// CHECK14-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 1
 // CHECK14-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP102]], i64* [[TMP107]], align 8
-// CHECK14-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 0
-// CHECK14-NEXT:    store i64 4, i64* [[TMP108]], align 8
-// CHECK14-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 0
-// CHECK14-NEXT:    store i8* null, i8** [[TMP109]], align 8
-// CHECK14-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 1
-// CHECK14-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP111]], align 8
-// CHECK14-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 1
-// CHECK14-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP113]], align 8
-// CHECK14-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 1
-// CHECK14-NEXT:    store i64 8, i64* [[TMP114]], align 8
-// CHECK14-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 1
-// CHECK14-NEXT:    store i8* null, i8** [[TMP115]], align 8
-// CHECK14-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP117]], align 8
-// CHECK14-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP119]], align 8
-// CHECK14-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 2
-// CHECK14-NEXT:    store i64 [[TMP103]], i64* [[TMP120]], align 8
-// CHECK14-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 2
-// CHECK14-NEXT:    store i8* null, i8** [[TMP121]], align 8
-// CHECK14-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP125:%.*]] = load i32, i32* [[N]], align 4
-// CHECK14-NEXT:    store i32 [[TMP125]], i32* [[DOTCAPTURE_EXPR_41]], align 4
-// CHECK14-NEXT:    [[TMP126:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_41]], align 4
-// CHECK14-NEXT:    [[SUB43:%.*]] = sub nsw i32 [[TMP126]], 0
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP107]], align 8
+// CHECK14-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP109]], align 8
+// CHECK14-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 1
+// CHECK14-NEXT:    store i8* null, i8** [[TMP110]], align 8
+// CHECK14-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP112]], align 8
+// CHECK14-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 2
+// CHECK14-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP114]], align 8
+// CHECK14-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 2
+// CHECK14-NEXT:    store i64 [[TMP99]], i64* [[TMP115]], align 8
+// CHECK14-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 2
+// CHECK14-NEXT:    store i8* null, i8** [[TMP116]], align 8
+// CHECK14-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP120:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    store i32 [[TMP120]], i32* [[DOTCAPTURE_EXPR_41]], align 4
+// CHECK14-NEXT:    [[TMP121:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_41]], align 4
+// CHECK14-NEXT:    [[SUB43:%.*]] = sub nsw i32 [[TMP121]], 0
 // CHECK14-NEXT:    [[DIV44:%.*]] = sdiv i32 [[SUB43]], 1
 // CHECK14-NEXT:    [[SUB45:%.*]] = sub nsw i32 [[DIV44]], 1
 // CHECK14-NEXT:    store i32 [[SUB45]], i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK14-NEXT:    [[TMP127:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK14-NEXT:    [[ADD46:%.*]] = add nsw i32 [[TMP127]], 1
-// CHECK14-NEXT:    [[TMP128:%.*]] = zext i32 [[ADD46]] to i64
-// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP128]])
-// CHECK14-NEXT:    [[TMP129:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169.region_id, i32 3, i8** [[TMP122]], i8** [[TMP123]], i64* [[TMP124]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK14-NEXT:    [[TMP130:%.*]] = icmp ne i32 [[TMP129]], 0
-// CHECK14-NEXT:    br i1 [[TMP130]], label [[OMP_OFFLOAD_FAILED47:%.*]], label [[OMP_OFFLOAD_CONT48:%.*]]
+// CHECK14-NEXT:    [[TMP122:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
+// CHECK14-NEXT:    [[ADD46:%.*]] = add nsw i32 [[TMP122]], 1
+// CHECK14-NEXT:    [[TMP123:%.*]] = zext i32 [[ADD46]] to i64
+// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP123]])
+// CHECK14-NEXT:    [[TMP124:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169.region_id, i32 3, i8** [[TMP117]], i8** [[TMP118]], i64* [[TMP119]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP125:%.*]] = icmp ne i32 [[TMP124]], 0
+// CHECK14-NEXT:    br i1 [[TMP125]], label [[OMP_OFFLOAD_FAILED47:%.*]], label [[OMP_OFFLOAD_CONT48:%.*]]
 // CHECK14:       omp_offload.failed47:
-// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169(i64 [[TMP102]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169(i64 [[TMP98]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK14-NEXT:    br label [[OMP_OFFLOAD_CONT48]]
 // CHECK14:       omp_offload.cont48:
-// CHECK14-NEXT:    [[TMP131:%.*]] = load i32, i32* [[M]], align 4
+// CHECK14-NEXT:    [[TMP126:%.*]] = load i32, i32* [[M]], align 4
 // CHECK14-NEXT:    [[CONV50:%.*]] = bitcast i64* [[M_CASTED49]] to i32*
-// CHECK14-NEXT:    store i32 [[TMP131]], i32* [[CONV50]], align 4
-// CHECK14-NEXT:    [[TMP132:%.*]] = load i64, i64* [[M_CASTED49]], align 8
-// CHECK14-NEXT:    [[TMP133:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    store i32 [[TMP126]], i32* [[CONV50]], align 4
+// CHECK14-NEXT:    [[TMP127:%.*]] = load i64, i64* [[M_CASTED49]], align 8
+// CHECK14-NEXT:    [[TMP128:%.*]] = load i32, i32* [[N]], align 4
 // CHECK14-NEXT:    [[CONV52:%.*]] = bitcast i64* [[N_CASTED51]] to i32*
-// CHECK14-NEXT:    store i32 [[TMP133]], i32* [[CONV52]], align 4
-// CHECK14-NEXT:    [[TMP134:%.*]] = load i64, i64* [[N_CASTED51]], align 8
-// CHECK14-NEXT:    [[TMP135:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK14-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP132]], i64* [[TMP137]], align 8
-// CHECK14-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP132]], i64* [[TMP139]], align 8
-// CHECK14-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 0
-// CHECK14-NEXT:    store i64 4, i64* [[TMP140]], align 8
-// CHECK14-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 0
+// CHECK14-NEXT:    store i32 [[TMP128]], i32* [[CONV52]], align 4
+// CHECK14-NEXT:    [[TMP129:%.*]] = load i64, i64* [[N_CASTED51]], align 8
+// CHECK14-NEXT:    [[TMP130:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK14-NEXT:    [[TMP131:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES56]] to i8*
+// CHECK14-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP131]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i64 32, i1 false)
+// CHECK14-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP127]], i64* [[TMP133]], align 8
+// CHECK14-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP127]], i64* [[TMP135]], align 8
+// CHECK14-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 0
+// CHECK14-NEXT:    store i8* null, i8** [[TMP136]], align 8
+// CHECK14-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP129]], i64* [[TMP138]], align 8
+// CHECK14-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to i64*
+// CHECK14-NEXT:    store i64 [[TMP129]], i64* [[TMP140]], align 8
+// CHECK14-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 1
 // CHECK14-NEXT:    store i8* null, i8** [[TMP141]], align 8
-// CHECK14-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 1
+// CHECK14-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 2
 // CHECK14-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP134]], i64* [[TMP143]], align 8
-// CHECK14-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 1
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP143]], align 8
+// CHECK14-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 2
 // CHECK14-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP134]], i64* [[TMP145]], align 8
-// CHECK14-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 1
-// CHECK14-NEXT:    store i64 4, i64* [[TMP146]], align 8
-// CHECK14-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 1
-// CHECK14-NEXT:    store i8* null, i8** [[TMP147]], align 8
-// CHECK14-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP149]], align 8
-// CHECK14-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 2
-// CHECK14-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i64*
-// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP151]], align 8
-// CHECK14-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 2
-// CHECK14-NEXT:    store i64 8, i64* [[TMP152]], align 8
-// CHECK14-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 2
-// CHECK14-NEXT:    store i8* null, i8** [[TMP153]], align 8
-// CHECK14-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 3
-// CHECK14-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP155]], align 8
-// CHECK14-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 3
-// CHECK14-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32**
-// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP157]], align 8
-// CHECK14-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 3
-// CHECK14-NEXT:    store i64 [[TMP135]], i64* [[TMP158]], align 8
-// CHECK14-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 3
-// CHECK14-NEXT:    store i8* null, i8** [[TMP159]], align 8
-// CHECK14-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 0
-// CHECK14-NEXT:    [[TMP163:%.*]] = load i32, i32* [[N]], align 4
-// CHECK14-NEXT:    store i32 [[TMP163]], i32* [[DOTCAPTURE_EXPR_58]], align 4
-// CHECK14-NEXT:    [[TMP164:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_58]], align 4
-// CHECK14-NEXT:    [[SUB60:%.*]] = sub nsw i32 [[TMP164]], 0
+// CHECK14-NEXT:    store i64 [[TMP1]], i64* [[TMP145]], align 8
+// CHECK14-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 2
+// CHECK14-NEXT:    store i8* null, i8** [[TMP146]], align 8
+// CHECK14-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 3
+// CHECK14-NEXT:    [[TMP148:%.*]] = bitcast i8** [[TMP147]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP148]], align 8
+// CHECK14-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 3
+// CHECK14-NEXT:    [[TMP150:%.*]] = bitcast i8** [[TMP149]] to i32**
+// CHECK14-NEXT:    store i32* [[VLA]], i32** [[TMP150]], align 8
+// CHECK14-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 3
+// CHECK14-NEXT:    store i64 [[TMP130]], i64* [[TMP151]], align 8
+// CHECK14-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 3
+// CHECK14-NEXT:    store i8* null, i8** [[TMP152]], align 8
+// CHECK14-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 0
+// CHECK14-NEXT:    [[TMP156:%.*]] = load i32, i32* [[N]], align 4
+// CHECK14-NEXT:    store i32 [[TMP156]], i32* [[DOTCAPTURE_EXPR_58]], align 4
+// CHECK14-NEXT:    [[TMP157:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_58]], align 4
+// CHECK14-NEXT:    [[SUB60:%.*]] = sub nsw i32 [[TMP157]], 0
 // CHECK14-NEXT:    [[DIV61:%.*]] = sdiv i32 [[SUB60]], 1
 // CHECK14-NEXT:    [[SUB62:%.*]] = sub nsw i32 [[DIV61]], 1
 // CHECK14-NEXT:    store i32 [[SUB62]], i32* [[DOTCAPTURE_EXPR_59]], align 4
-// CHECK14-NEXT:    [[TMP165:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_59]], align 4
-// CHECK14-NEXT:    [[ADD63:%.*]] = add nsw i32 [[TMP165]], 1
-// CHECK14-NEXT:    [[TMP166:%.*]] = zext i32 [[ADD63]] to i64
-// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP166]])
-// CHECK14-NEXT:    [[TMP167:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174.region_id, i32 4, i8** [[TMP160]], i8** [[TMP161]], i64* [[TMP162]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK14-NEXT:    [[TMP168:%.*]] = icmp ne i32 [[TMP167]], 0
-// CHECK14-NEXT:    br i1 [[TMP168]], label [[OMP_OFFLOAD_FAILED64:%.*]], label [[OMP_OFFLOAD_CONT65:%.*]]
+// CHECK14-NEXT:    [[TMP158:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_59]], align 4
+// CHECK14-NEXT:    [[ADD63:%.*]] = add nsw i32 [[TMP158]], 1
+// CHECK14-NEXT:    [[TMP159:%.*]] = zext i32 [[ADD63]] to i64
+// CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP159]])
+// CHECK14-NEXT:    [[TMP160:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174.region_id, i32 4, i8** [[TMP153]], i8** [[TMP154]], i64* [[TMP155]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP161:%.*]] = icmp ne i32 [[TMP160]], 0
+// CHECK14-NEXT:    br i1 [[TMP161]], label [[OMP_OFFLOAD_FAILED64:%.*]], label [[OMP_OFFLOAD_CONT65:%.*]]
 // CHECK14:       omp_offload.failed64:
-// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174(i64 [[TMP132]], i64 [[TMP134]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK14-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174(i64 [[TMP127]], i64 [[TMP129]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK14-NEXT:    br label [[OMP_OFFLOAD_CONT65]]
 // CHECK14:       omp_offload.cont65:
-// CHECK14-NEXT:    [[TMP169:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK14-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP169]])
+// CHECK14-NEXT:    [[TMP162:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK14-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP162]])
 // CHECK14-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK14-NEXT:    [[TMP170:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK14-NEXT:    call void @llvm.stackrestore(i8* [[TMP170]])
-// CHECK14-NEXT:    [[TMP171:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK14-NEXT:    ret i32 [[TMP171]]
+// CHECK14-NEXT:    [[TMP163:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK14-NEXT:    call void @llvm.stackrestore(i8* [[TMP163]])
+// CHECK14-NEXT:    [[TMP164:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK14-NEXT:    ret i32 [[TMP164]]
 //
 //
 // CHECK14-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154
@@ -12286,11 +12258,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP3]], i32* [[CONV2]], align 4
 // CHECK14-NEXT:    [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -12372,7 +12344,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !28
 // CHECK14-NEXT:    [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]), !llvm.access.group !28
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]), !llvm.access.group !28
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
@@ -12426,7 +12398,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -12558,11 +12530,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32*
 // CHECK14-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
 // CHECK14-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -12634,7 +12606,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
 // CHECK14-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34
 // CHECK14-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !34
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !34
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
@@ -12665,7 +12637,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -12801,11 +12773,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP3]], i32* [[CONV2]], align 4
 // CHECK14-NEXT:    [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -12885,7 +12857,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP20]], i32* [[CONV7]], align 4, !llvm.access.group !40
 // CHECK14-NEXT:    [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]]), !llvm.access.group !40
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]]), !llvm.access.group !40
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40
@@ -12916,7 +12888,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -13034,7 +13006,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK14-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK14-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK14-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK14-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -13074,7 +13046,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK14-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l122.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l122.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK14-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK14-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK14:       omp_offload.failed:
@@ -13092,7 +13064,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK14-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l127.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l127.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK14-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK14-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK14:       omp_offload.failed5:
@@ -13122,7 +13094,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK14-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l132.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l132.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK14-NEXT:    [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
 // CHECK14-NEXT:    br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK14:       omp_offload.failed11:
@@ -13140,7 +13112,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK14-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l137.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l137.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK14-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
 // CHECK14-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK14:       omp_offload.failed17:
@@ -13170,7 +13142,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
 // CHECK14-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
 // CHECK14-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK14-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l142.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK14-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l142.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK14-NEXT:    [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0
 // CHECK14-NEXT:    br i1 [[TMP58]], label [[OMP_OFFLOAD_FAILED25:%.*]], label [[OMP_OFFLOAD_CONT26:%.*]]
 // CHECK14:       omp_offload.failed25:
@@ -13186,11 +13158,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK14-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK14-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -13238,7 +13210,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK14-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46
 // CHECK14-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
@@ -13260,7 +13232,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -13350,11 +13322,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK14-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK14-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -13402,7 +13374,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK14-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52
 // CHECK14-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52
@@ -13424,7 +13396,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -13525,11 +13497,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK14-NEXT:    [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -13585,7 +13557,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58
 // CHECK14-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58
@@ -13607,7 +13579,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -13722,11 +13694,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK14-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK14-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -13774,7 +13746,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK14-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64
 // CHECK14-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64
@@ -13796,7 +13768,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -13895,11 +13867,11 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK14-NEXT:    [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -13955,7 +13927,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK14-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70
 // CHECK14-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70
-// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70
+// CHECK14-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70
 // CHECK14-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK14:       omp.inner.for.inc:
 // CHECK14-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70
@@ -13977,7 +13949,7 @@ int main (int argc, char **argv) {
 // CHECK14-NEXT:    ret void
 //
 //
-// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK14-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -14064,7 +14036,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK14-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK14-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK14-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK14-NEXT:  entry:
 // CHECK14-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK14-NEXT:    ret void
@@ -14138,14 +14110,14 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK15-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK15-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK15-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK15-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK15-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK15-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK15-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK15-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK15-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK15-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK15-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK15-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK15-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK15-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK15-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK15-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK15-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -14154,300 +14126,286 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK15-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK15-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK15-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK15-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK15-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK15-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK15-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK15-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK15-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK15-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK15-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK15-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK15-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK15-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK15-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK15-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK15-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK15-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK15-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK15-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK15-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK15-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK15-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK15-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK15-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK15-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK15-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK15-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK15-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK15-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK15-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK15-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK15-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK15-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK15-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK15-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK15-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK15-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK15-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK15-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK15:       omp_offload.failed:
 // CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK15-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK15:       omp_offload.cont:
-// CHECK15-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK15-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK15-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK15-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK15-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK15-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK15-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK15-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK15-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i64 24, i1 false)
 // CHECK15-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK15-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK15-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK15-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK15-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK15-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK15-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK15-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK15-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK15-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK15-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK15-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK15-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK15-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK15-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK15-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK15-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK15-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK15-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK15-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK15-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK15-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK15-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK15-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK15-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK15-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK15-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK15-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK15-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK15-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK15-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK15-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK15-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK15-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK15-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK15-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK15-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK15-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK15-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK15-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK15-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK15-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK15-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK15-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK15-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK15-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK15-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK15-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK15-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK15-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK15-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK15-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK15-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP60]])
-// CHECK15-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK15-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK15-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK15-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK15-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK15-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP58]])
+// CHECK15-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK15-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK15:       omp_offload.failed16:
-// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK15-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK15:       omp_offload.cont17:
-// CHECK15-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M]], align 4
+// CHECK15-NEXT:    [[TMP61:%.*]] = load i32, i32* [[M]], align 4
 // CHECK15-NEXT:    [[CONV18:%.*]] = bitcast i64* [[M_CASTED]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP63]], i32* [[CONV18]], align 4
-// CHECK15-NEXT:    [[TMP64:%.*]] = load i64, i64* [[M_CASTED]], align 8
-// CHECK15-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP61]], i32* [[CONV18]], align 4
+// CHECK15-NEXT:    [[TMP62:%.*]] = load i64, i64* [[M_CASTED]], align 8
+// CHECK15-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
 // CHECK15-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP65]], i32* [[CONV20]], align 4
-// CHECK15-NEXT:    [[TMP66:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK15-NEXT:    [[TMP67:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK15-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
-// CHECK15-NEXT:    store i64 [[TMP64]], i64* [[TMP69]], align 8
-// CHECK15-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
-// CHECK15-NEXT:    store i64 [[TMP64]], i64* [[TMP71]], align 8
-// CHECK15-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
-// CHECK15-NEXT:    store i64 4, i64* [[TMP72]], align 8
-// CHECK15-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
-// CHECK15-NEXT:    store i8* null, i8** [[TMP73]], align 8
-// CHECK15-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK15-NEXT:    store i32 [[TMP63]], i32* [[CONV20]], align 4
+// CHECK15-NEXT:    [[TMP64:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK15-NEXT:    [[TMP65:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK15-NEXT:    [[TMP66:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES24]] to i8*
+// CHECK15-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP66]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i64 32, i1 false)
+// CHECK15-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i64*
+// CHECK15-NEXT:    store i64 [[TMP62]], i64* [[TMP68]], align 8
+// CHECK15-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
+// CHECK15-NEXT:    store i64 [[TMP62]], i64* [[TMP70]], align 8
+// CHECK15-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
+// CHECK15-NEXT:    store i8* null, i8** [[TMP71]], align 8
+// CHECK15-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i64*
+// CHECK15-NEXT:    store i64 [[TMP64]], i64* [[TMP73]], align 8
+// CHECK15-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
 // CHECK15-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i64*
-// CHECK15-NEXT:    store i64 [[TMP66]], i64* [[TMP75]], align 8
-// CHECK15-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
-// CHECK15-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i64*
-// CHECK15-NEXT:    store i64 [[TMP66]], i64* [[TMP77]], align 8
-// CHECK15-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 1
-// CHECK15-NEXT:    store i64 4, i64* [[TMP78]], align 8
-// CHECK15-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
-// CHECK15-NEXT:    store i8* null, i8** [[TMP79]], align 8
-// CHECK15-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i64*
-// CHECK15-NEXT:    store i64 [[TMP1]], i64* [[TMP81]], align 8
-// CHECK15-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
-// CHECK15-NEXT:    store i64 [[TMP1]], i64* [[TMP83]], align 8
-// CHECK15-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 2
-// CHECK15-NEXT:    store i64 8, i64* [[TMP84]], align 8
-// CHECK15-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
-// CHECK15-NEXT:    store i8* null, i8** [[TMP85]], align 8
-// CHECK15-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
-// CHECK15-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP87]], align 8
-// CHECK15-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
-// CHECK15-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP89]], align 8
-// CHECK15-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 3
-// CHECK15-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK15-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
-// CHECK15-NEXT:    store i8* null, i8** [[TMP91]], align 8
-// CHECK15-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP95:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP95]], i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK15-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK15-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP96]], 0
+// CHECK15-NEXT:    store i64 [[TMP64]], i64* [[TMP75]], align 8
+// CHECK15-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
+// CHECK15-NEXT:    store i8* null, i8** [[TMP76]], align 8
+// CHECK15-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
+// CHECK15-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
+// CHECK15-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
+// CHECK15-NEXT:    store i64 [[TMP1]], i64* [[TMP80]], align 8
+// CHECK15-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
+// CHECK15-NEXT:    store i8* null, i8** [[TMP81]], align 8
+// CHECK15-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
+// CHECK15-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 8
+// CHECK15-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
+// CHECK15-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP85]], align 8
+// CHECK15-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 3
+// CHECK15-NEXT:    store i64 [[TMP65]], i64* [[TMP86]], align 8
+// CHECK15-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
+// CHECK15-NEXT:    store i8* null, i8** [[TMP87]], align 8
+// CHECK15-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP91:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP91]], i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK15-NEXT:    [[TMP92:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK15-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP92]], 0
 // CHECK15-NEXT:    [[DIV29:%.*]] = sdiv i32 [[SUB28]], 1
 // CHECK15-NEXT:    [[SUB30:%.*]] = sub nsw i32 [[DIV29]], 1
 // CHECK15-NEXT:    store i32 [[SUB30]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK15-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK15-NEXT:    [[ADD31:%.*]] = add nsw i32 [[TMP97]], 1
-// CHECK15-NEXT:    [[TMP98:%.*]] = zext i32 [[ADD31]] to i64
-// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP98]])
-// CHECK15-NEXT:    [[TMP99:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164.region_id, i32 4, i8** [[TMP92]], i8** [[TMP93]], i64* [[TMP94]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK15-NEXT:    [[TMP100:%.*]] = icmp ne i32 [[TMP99]], 0
-// CHECK15-NEXT:    br i1 [[TMP100]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
+// CHECK15-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK15-NEXT:    [[ADD31:%.*]] = add nsw i32 [[TMP93]], 1
+// CHECK15-NEXT:    [[TMP94:%.*]] = zext i32 [[ADD31]] to i64
+// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP94]])
+// CHECK15-NEXT:    [[TMP95:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164.region_id, i32 4, i8** [[TMP88]], i8** [[TMP89]], i64* [[TMP90]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP96:%.*]] = icmp ne i32 [[TMP95]], 0
+// CHECK15-NEXT:    br i1 [[TMP96]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
 // CHECK15:       omp_offload.failed32:
-// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164(i64 [[TMP64]], i64 [[TMP66]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164(i64 [[TMP62]], i64 [[TMP64]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK15-NEXT:    br label [[OMP_OFFLOAD_CONT33]]
 // CHECK15:       omp_offload.cont33:
-// CHECK15-NEXT:    [[TMP101:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    [[TMP97:%.*]] = load i32, i32* [[N]], align 4
 // CHECK15-NEXT:    [[CONV35:%.*]] = bitcast i64* [[N_CASTED34]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP101]], i32* [[CONV35]], align 4
-// CHECK15-NEXT:    [[TMP102:%.*]] = load i64, i64* [[N_CASTED34]], align 8
-// CHECK15-NEXT:    [[TMP103:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK15-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i64*
-// CHECK15-NEXT:    store i64 [[TMP102]], i64* [[TMP105]], align 8
-// CHECK15-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK15-NEXT:    store i32 [[TMP97]], i32* [[CONV35]], align 4
+// CHECK15-NEXT:    [[TMP98:%.*]] = load i64, i64* [[N_CASTED34]], align 8
+// CHECK15-NEXT:    [[TMP99:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK15-NEXT:    [[TMP100:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES39]] to i8*
+// CHECK15-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP100]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i64 24, i1 false)
+// CHECK15-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i64*
+// CHECK15-NEXT:    store i64 [[TMP98]], i64* [[TMP102]], align 8
+// CHECK15-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to i64*
+// CHECK15-NEXT:    store i64 [[TMP98]], i64* [[TMP104]], align 8
+// CHECK15-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 0
+// CHECK15-NEXT:    store i8* null, i8** [[TMP105]], align 8
+// CHECK15-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 1
 // CHECK15-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i64*
-// CHECK15-NEXT:    store i64 [[TMP102]], i64* [[TMP107]], align 8
-// CHECK15-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 0
-// CHECK15-NEXT:    store i64 4, i64* [[TMP108]], align 8
-// CHECK15-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 0
-// CHECK15-NEXT:    store i8* null, i8** [[TMP109]], align 8
-// CHECK15-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 1
-// CHECK15-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to i64*
-// CHECK15-NEXT:    store i64 [[TMP1]], i64* [[TMP111]], align 8
-// CHECK15-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 1
-// CHECK15-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i64*
-// CHECK15-NEXT:    store i64 [[TMP1]], i64* [[TMP113]], align 8
-// CHECK15-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 1
-// CHECK15-NEXT:    store i64 8, i64* [[TMP114]], align 8
-// CHECK15-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 1
-// CHECK15-NEXT:    store i8* null, i8** [[TMP115]], align 8
-// CHECK15-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP117]], align 8
-// CHECK15-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP119]], align 8
-// CHECK15-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 2
-// CHECK15-NEXT:    store i64 [[TMP103]], i64* [[TMP120]], align 8
-// CHECK15-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 2
-// CHECK15-NEXT:    store i8* null, i8** [[TMP121]], align 8
-// CHECK15-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP125:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP125]], i32* [[DOTCAPTURE_EXPR_41]], align 4
-// CHECK15-NEXT:    [[TMP126:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_41]], align 4
-// CHECK15-NEXT:    [[SUB43:%.*]] = sub nsw i32 [[TMP126]], 0
+// CHECK15-NEXT:    store i64 [[TMP1]], i64* [[TMP107]], align 8
+// CHECK15-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i64*
+// CHECK15-NEXT:    store i64 [[TMP1]], i64* [[TMP109]], align 8
+// CHECK15-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 1
+// CHECK15-NEXT:    store i8* null, i8** [[TMP110]], align 8
+// CHECK15-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP112]], align 8
+// CHECK15-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 2
+// CHECK15-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP114]], align 8
+// CHECK15-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 2
+// CHECK15-NEXT:    store i64 [[TMP99]], i64* [[TMP115]], align 8
+// CHECK15-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 2
+// CHECK15-NEXT:    store i8* null, i8** [[TMP116]], align 8
+// CHECK15-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP120:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP120]], i32* [[DOTCAPTURE_EXPR_41]], align 4
+// CHECK15-NEXT:    [[TMP121:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_41]], align 4
+// CHECK15-NEXT:    [[SUB43:%.*]] = sub nsw i32 [[TMP121]], 0
 // CHECK15-NEXT:    [[DIV44:%.*]] = sdiv i32 [[SUB43]], 1
 // CHECK15-NEXT:    [[SUB45:%.*]] = sub nsw i32 [[DIV44]], 1
 // CHECK15-NEXT:    store i32 [[SUB45]], i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK15-NEXT:    [[TMP127:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK15-NEXT:    [[ADD46:%.*]] = add nsw i32 [[TMP127]], 1
-// CHECK15-NEXT:    [[TMP128:%.*]] = zext i32 [[ADD46]] to i64
-// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP128]])
-// CHECK15-NEXT:    [[TMP129:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169.region_id, i32 3, i8** [[TMP122]], i8** [[TMP123]], i64* [[TMP124]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK15-NEXT:    [[TMP130:%.*]] = icmp ne i32 [[TMP129]], 0
-// CHECK15-NEXT:    br i1 [[TMP130]], label [[OMP_OFFLOAD_FAILED47:%.*]], label [[OMP_OFFLOAD_CONT48:%.*]]
+// CHECK15-NEXT:    [[TMP122:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
+// CHECK15-NEXT:    [[ADD46:%.*]] = add nsw i32 [[TMP122]], 1
+// CHECK15-NEXT:    [[TMP123:%.*]] = zext i32 [[ADD46]] to i64
+// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP123]])
+// CHECK15-NEXT:    [[TMP124:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169.region_id, i32 3, i8** [[TMP117]], i8** [[TMP118]], i64* [[TMP119]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP125:%.*]] = icmp ne i32 [[TMP124]], 0
+// CHECK15-NEXT:    br i1 [[TMP125]], label [[OMP_OFFLOAD_FAILED47:%.*]], label [[OMP_OFFLOAD_CONT48:%.*]]
 // CHECK15:       omp_offload.failed47:
-// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169(i64 [[TMP102]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169(i64 [[TMP98]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK15-NEXT:    br label [[OMP_OFFLOAD_CONT48]]
 // CHECK15:       omp_offload.cont48:
-// CHECK15-NEXT:    [[TMP131:%.*]] = load i32, i32* [[M]], align 4
+// CHECK15-NEXT:    [[TMP126:%.*]] = load i32, i32* [[M]], align 4
 // CHECK15-NEXT:    [[CONV50:%.*]] = bitcast i64* [[M_CASTED49]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP131]], i32* [[CONV50]], align 4
-// CHECK15-NEXT:    [[TMP132:%.*]] = load i64, i64* [[M_CASTED49]], align 8
-// CHECK15-NEXT:    [[TMP133:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP126]], i32* [[CONV50]], align 4
+// CHECK15-NEXT:    [[TMP127:%.*]] = load i64, i64* [[M_CASTED49]], align 8
+// CHECK15-NEXT:    [[TMP128:%.*]] = load i32, i32* [[N]], align 4
 // CHECK15-NEXT:    [[CONV52:%.*]] = bitcast i64* [[N_CASTED51]] to i32*
-// CHECK15-NEXT:    store i32 [[TMP133]], i32* [[CONV52]], align 4
-// CHECK15-NEXT:    [[TMP134:%.*]] = load i64, i64* [[N_CASTED51]], align 8
-// CHECK15-NEXT:    [[TMP135:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK15-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i64*
-// CHECK15-NEXT:    store i64 [[TMP132]], i64* [[TMP137]], align 8
-// CHECK15-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i64*
-// CHECK15-NEXT:    store i64 [[TMP132]], i64* [[TMP139]], align 8
-// CHECK15-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 0
-// CHECK15-NEXT:    store i64 4, i64* [[TMP140]], align 8
-// CHECK15-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 0
+// CHECK15-NEXT:    store i32 [[TMP128]], i32* [[CONV52]], align 4
+// CHECK15-NEXT:    [[TMP129:%.*]] = load i64, i64* [[N_CASTED51]], align 8
+// CHECK15-NEXT:    [[TMP130:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK15-NEXT:    [[TMP131:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES56]] to i8*
+// CHECK15-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP131]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i64 32, i1 false)
+// CHECK15-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to i64*
+// CHECK15-NEXT:    store i64 [[TMP127]], i64* [[TMP133]], align 8
+// CHECK15-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to i64*
+// CHECK15-NEXT:    store i64 [[TMP127]], i64* [[TMP135]], align 8
+// CHECK15-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 0
+// CHECK15-NEXT:    store i8* null, i8** [[TMP136]], align 8
+// CHECK15-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to i64*
+// CHECK15-NEXT:    store i64 [[TMP129]], i64* [[TMP138]], align 8
+// CHECK15-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to i64*
+// CHECK15-NEXT:    store i64 [[TMP129]], i64* [[TMP140]], align 8
+// CHECK15-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 1
 // CHECK15-NEXT:    store i8* null, i8** [[TMP141]], align 8
-// CHECK15-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 1
+// CHECK15-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 2
 // CHECK15-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i64*
-// CHECK15-NEXT:    store i64 [[TMP134]], i64* [[TMP143]], align 8
-// CHECK15-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 1
+// CHECK15-NEXT:    store i64 [[TMP1]], i64* [[TMP143]], align 8
+// CHECK15-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 2
 // CHECK15-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i64*
-// CHECK15-NEXT:    store i64 [[TMP134]], i64* [[TMP145]], align 8
-// CHECK15-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 1
-// CHECK15-NEXT:    store i64 4, i64* [[TMP146]], align 8
-// CHECK15-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 1
-// CHECK15-NEXT:    store i8* null, i8** [[TMP147]], align 8
-// CHECK15-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i64*
-// CHECK15-NEXT:    store i64 [[TMP1]], i64* [[TMP149]], align 8
-// CHECK15-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 2
-// CHECK15-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i64*
-// CHECK15-NEXT:    store i64 [[TMP1]], i64* [[TMP151]], align 8
-// CHECK15-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 2
-// CHECK15-NEXT:    store i64 8, i64* [[TMP152]], align 8
-// CHECK15-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 2
-// CHECK15-NEXT:    store i8* null, i8** [[TMP153]], align 8
-// CHECK15-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 3
-// CHECK15-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP155]], align 8
-// CHECK15-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 3
-// CHECK15-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32**
-// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP157]], align 8
-// CHECK15-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 3
-// CHECK15-NEXT:    store i64 [[TMP135]], i64* [[TMP158]], align 8
-// CHECK15-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 3
-// CHECK15-NEXT:    store i8* null, i8** [[TMP159]], align 8
-// CHECK15-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 0
-// CHECK15-NEXT:    [[TMP163:%.*]] = load i32, i32* [[N]], align 4
-// CHECK15-NEXT:    store i32 [[TMP163]], i32* [[DOTCAPTURE_EXPR_58]], align 4
-// CHECK15-NEXT:    [[TMP164:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_58]], align 4
-// CHECK15-NEXT:    [[SUB60:%.*]] = sub nsw i32 [[TMP164]], 0
+// CHECK15-NEXT:    store i64 [[TMP1]], i64* [[TMP145]], align 8
+// CHECK15-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 2
+// CHECK15-NEXT:    store i8* null, i8** [[TMP146]], align 8
+// CHECK15-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 3
+// CHECK15-NEXT:    [[TMP148:%.*]] = bitcast i8** [[TMP147]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP148]], align 8
+// CHECK15-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 3
+// CHECK15-NEXT:    [[TMP150:%.*]] = bitcast i8** [[TMP149]] to i32**
+// CHECK15-NEXT:    store i32* [[VLA]], i32** [[TMP150]], align 8
+// CHECK15-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 3
+// CHECK15-NEXT:    store i64 [[TMP130]], i64* [[TMP151]], align 8
+// CHECK15-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 3
+// CHECK15-NEXT:    store i8* null, i8** [[TMP152]], align 8
+// CHECK15-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 0
+// CHECK15-NEXT:    [[TMP156:%.*]] = load i32, i32* [[N]], align 4
+// CHECK15-NEXT:    store i32 [[TMP156]], i32* [[DOTCAPTURE_EXPR_58]], align 4
+// CHECK15-NEXT:    [[TMP157:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_58]], align 4
+// CHECK15-NEXT:    [[SUB60:%.*]] = sub nsw i32 [[TMP157]], 0
 // CHECK15-NEXT:    [[DIV61:%.*]] = sdiv i32 [[SUB60]], 1
 // CHECK15-NEXT:    [[SUB62:%.*]] = sub nsw i32 [[DIV61]], 1
 // CHECK15-NEXT:    store i32 [[SUB62]], i32* [[DOTCAPTURE_EXPR_59]], align 4
-// CHECK15-NEXT:    [[TMP165:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_59]], align 4
-// CHECK15-NEXT:    [[ADD63:%.*]] = add nsw i32 [[TMP165]], 1
-// CHECK15-NEXT:    [[TMP166:%.*]] = zext i32 [[ADD63]] to i64
-// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP166]])
-// CHECK15-NEXT:    [[TMP167:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174.region_id, i32 4, i8** [[TMP160]], i8** [[TMP161]], i64* [[TMP162]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK15-NEXT:    [[TMP168:%.*]] = icmp ne i32 [[TMP167]], 0
-// CHECK15-NEXT:    br i1 [[TMP168]], label [[OMP_OFFLOAD_FAILED64:%.*]], label [[OMP_OFFLOAD_CONT65:%.*]]
+// CHECK15-NEXT:    [[TMP158:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_59]], align 4
+// CHECK15-NEXT:    [[ADD63:%.*]] = add nsw i32 [[TMP158]], 1
+// CHECK15-NEXT:    [[TMP159:%.*]] = zext i32 [[ADD63]] to i64
+// CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP159]])
+// CHECK15-NEXT:    [[TMP160:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174.region_id, i32 4, i8** [[TMP153]], i8** [[TMP154]], i64* [[TMP155]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP161:%.*]] = icmp ne i32 [[TMP160]], 0
+// CHECK15-NEXT:    br i1 [[TMP161]], label [[OMP_OFFLOAD_FAILED64:%.*]], label [[OMP_OFFLOAD_CONT65:%.*]]
 // CHECK15:       omp_offload.failed64:
-// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174(i64 [[TMP132]], i64 [[TMP134]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK15-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174(i64 [[TMP127]], i64 [[TMP129]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK15-NEXT:    br label [[OMP_OFFLOAD_CONT65]]
 // CHECK15:       omp_offload.cont65:
-// CHECK15-NEXT:    [[TMP169:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK15-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP169]])
+// CHECK15-NEXT:    [[TMP162:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK15-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP162]])
 // CHECK15-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK15-NEXT:    [[TMP170:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK15-NEXT:    call void @llvm.stackrestore(i8* [[TMP170]])
-// CHECK15-NEXT:    [[TMP171:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK15-NEXT:    ret i32 [[TMP171]]
+// CHECK15-NEXT:    [[TMP163:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK15-NEXT:    call void @llvm.stackrestore(i8* [[TMP163]])
+// CHECK15-NEXT:    [[TMP164:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK15-NEXT:    ret i32 [[TMP164]]
 //
 //
 // CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154
@@ -14945,11 +14903,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK15-NEXT:    store i32 [[TMP3]], i32* [[CONV2]], align 4
 // CHECK15-NEXT:    [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -15031,7 +14989,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK15-NEXT:    store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !28
 // CHECK15-NEXT:    [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]), !llvm.access.group !28
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]), !llvm.access.group !28
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
@@ -15085,7 +15043,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -15217,11 +15175,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32*
 // CHECK15-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
 // CHECK15-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -15293,7 +15251,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
 // CHECK15-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34
 // CHECK15-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !34
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !34
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
@@ -15324,7 +15282,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -15460,11 +15418,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK15-NEXT:    store i32 [[TMP3]], i32* [[CONV2]], align 4
 // CHECK15-NEXT:    [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -15544,7 +15502,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK15-NEXT:    store i32 [[TMP20]], i32* [[CONV7]], align 4, !llvm.access.group !40
 // CHECK15-NEXT:    [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]]), !llvm.access.group !40
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]]), !llvm.access.group !40
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40
@@ -15575,7 +15533,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -15693,7 +15651,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK15-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK15-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK15-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK15-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -15733,7 +15691,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK15-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l122.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l122.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK15-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK15-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK15:       omp_offload.failed:
@@ -15751,7 +15709,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK15-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l127.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l127.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK15-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK15-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK15:       omp_offload.failed5:
@@ -15781,7 +15739,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK15-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l132.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l132.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK15-NEXT:    [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
 // CHECK15-NEXT:    br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK15:       omp_offload.failed11:
@@ -15799,7 +15757,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK15-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l137.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l137.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK15-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
 // CHECK15-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK15:       omp_offload.failed17:
@@ -15829,7 +15787,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
 // CHECK15-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
 // CHECK15-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK15-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l142.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK15-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l142.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK15-NEXT:    [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0
 // CHECK15-NEXT:    br i1 [[TMP58]], label [[OMP_OFFLOAD_FAILED25:%.*]], label [[OMP_OFFLOAD_CONT26:%.*]]
 // CHECK15:       omp_offload.failed25:
@@ -15845,11 +15803,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK15-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK15-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -15897,7 +15855,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK15-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46
 // CHECK15-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
@@ -15919,7 +15877,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -16009,11 +15967,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK15-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK15-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -16061,7 +16019,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK15-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52
 // CHECK15-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52
@@ -16083,7 +16041,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -16184,11 +16142,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK15-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK15-NEXT:    [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -16244,7 +16202,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK15-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58
 // CHECK15-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58
@@ -16266,7 +16224,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -16381,11 +16339,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK15-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK15-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -16433,7 +16391,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK15-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64
 // CHECK15-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64
@@ -16455,7 +16413,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -16554,11 +16512,11 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK15-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK15-NEXT:    [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -16614,7 +16572,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK15-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70
 // CHECK15-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70
-// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70
+// CHECK15-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70
 // CHECK15-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK15:       omp.inner.for.inc:
 // CHECK15-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70
@@ -16636,7 +16594,7 @@ int main (int argc, char **argv) {
 // CHECK15-NEXT:    ret void
 //
 //
-// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK15-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -16723,7 +16681,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK15-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK15-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK15-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK15-NEXT:  entry:
 // CHECK15-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK15-NEXT:    ret void
@@ -16797,14 +16755,14 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK16-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK16-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK16-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK16-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK16-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK16-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK16-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK16-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK16-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK16-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK16-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK16-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK16-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK16-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK16-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK16-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK16-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -16813,300 +16771,286 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK16-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK16-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK16-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK16-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK16-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK16-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK16-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK16-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK16-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK16-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK16-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK16-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK16-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK16-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK16-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK16-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK16-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK16-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK16-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK16-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK16-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK16-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK16-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK16-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK16-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK16-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK16-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK16-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK16-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK16-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK16-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK16-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK16-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK16-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK16-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK16-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK16-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK16-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK16-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK16-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK16:       omp_offload.failed:
 // CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK16-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK16:       omp_offload.cont:
-// CHECK16-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK16-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK16-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK16-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK16-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK16-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK16-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK16-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK16-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i64 24, i1 false)
 // CHECK16-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK16-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK16-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK16-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK16-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK16-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK16-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK16-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK16-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK16-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK16-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK16-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK16-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK16-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK16-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK16-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK16-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK16-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK16-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK16-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK16-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK16-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK16-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK16-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK16-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK16-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK16-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK16-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK16-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK16-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK16-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK16-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK16-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK16-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK16-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK16-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK16-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK16-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK16-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK16-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK16-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK16-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK16-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK16-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK16-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK16-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK16-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK16-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK16-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK16-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK16-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK16-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK16-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP60]])
-// CHECK16-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK16-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK16-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK16-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK16-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK16-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP58]])
+// CHECK16-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK16-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK16:       omp_offload.failed16:
-// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK16-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK16:       omp_offload.cont17:
-// CHECK16-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M]], align 4
+// CHECK16-NEXT:    [[TMP61:%.*]] = load i32, i32* [[M]], align 4
 // CHECK16-NEXT:    [[CONV18:%.*]] = bitcast i64* [[M_CASTED]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP63]], i32* [[CONV18]], align 4
-// CHECK16-NEXT:    [[TMP64:%.*]] = load i64, i64* [[M_CASTED]], align 8
-// CHECK16-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP61]], i32* [[CONV18]], align 4
+// CHECK16-NEXT:    [[TMP62:%.*]] = load i64, i64* [[M_CASTED]], align 8
+// CHECK16-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
 // CHECK16-NEXT:    [[CONV20:%.*]] = bitcast i64* [[N_CASTED19]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP65]], i32* [[CONV20]], align 4
-// CHECK16-NEXT:    [[TMP66:%.*]] = load i64, i64* [[N_CASTED19]], align 8
-// CHECK16-NEXT:    [[TMP67:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK16-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
-// CHECK16-NEXT:    store i64 [[TMP64]], i64* [[TMP69]], align 8
-// CHECK16-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
-// CHECK16-NEXT:    store i64 [[TMP64]], i64* [[TMP71]], align 8
-// CHECK16-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
-// CHECK16-NEXT:    store i64 4, i64* [[TMP72]], align 8
-// CHECK16-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
-// CHECK16-NEXT:    store i8* null, i8** [[TMP73]], align 8
-// CHECK16-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK16-NEXT:    store i32 [[TMP63]], i32* [[CONV20]], align 4
+// CHECK16-NEXT:    [[TMP64:%.*]] = load i64, i64* [[N_CASTED19]], align 8
+// CHECK16-NEXT:    [[TMP65:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK16-NEXT:    [[TMP66:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES24]] to i8*
+// CHECK16-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP66]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i64 32, i1 false)
+// CHECK16-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i64*
+// CHECK16-NEXT:    store i64 [[TMP62]], i64* [[TMP68]], align 8
+// CHECK16-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i64*
+// CHECK16-NEXT:    store i64 [[TMP62]], i64* [[TMP70]], align 8
+// CHECK16-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 0
+// CHECK16-NEXT:    store i8* null, i8** [[TMP71]], align 8
+// CHECK16-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i64*
+// CHECK16-NEXT:    store i64 [[TMP64]], i64* [[TMP73]], align 8
+// CHECK16-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
 // CHECK16-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i64*
-// CHECK16-NEXT:    store i64 [[TMP66]], i64* [[TMP75]], align 8
-// CHECK16-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 1
-// CHECK16-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i64*
-// CHECK16-NEXT:    store i64 [[TMP66]], i64* [[TMP77]], align 8
-// CHECK16-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 1
-// CHECK16-NEXT:    store i64 4, i64* [[TMP78]], align 8
-// CHECK16-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
-// CHECK16-NEXT:    store i8* null, i8** [[TMP79]], align 8
-// CHECK16-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i64*
-// CHECK16-NEXT:    store i64 [[TMP1]], i64* [[TMP81]], align 8
-// CHECK16-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64*
-// CHECK16-NEXT:    store i64 [[TMP1]], i64* [[TMP83]], align 8
-// CHECK16-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 2
-// CHECK16-NEXT:    store i64 8, i64* [[TMP84]], align 8
-// CHECK16-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
-// CHECK16-NEXT:    store i8* null, i8** [[TMP85]], align 8
-// CHECK16-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
-// CHECK16-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP87]], align 8
-// CHECK16-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
-// CHECK16-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP89]], align 8
-// CHECK16-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 3
-// CHECK16-NEXT:    store i64 [[TMP67]], i64* [[TMP90]], align 8
-// CHECK16-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
-// CHECK16-NEXT:    store i8* null, i8** [[TMP91]], align 8
-// CHECK16-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP95:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP95]], i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK16-NEXT:    [[TMP96:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK16-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP96]], 0
+// CHECK16-NEXT:    store i64 [[TMP64]], i64* [[TMP75]], align 8
+// CHECK16-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 1
+// CHECK16-NEXT:    store i8* null, i8** [[TMP76]], align 8
+// CHECK16-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64*
+// CHECK16-NEXT:    store i64 [[TMP1]], i64* [[TMP78]], align 8
+// CHECK16-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64*
+// CHECK16-NEXT:    store i64 [[TMP1]], i64* [[TMP80]], align 8
+// CHECK16-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 2
+// CHECK16-NEXT:    store i8* null, i8** [[TMP81]], align 8
+// CHECK16-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 3
+// CHECK16-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 8
+// CHECK16-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 3
+// CHECK16-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP85]], align 8
+// CHECK16-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 3
+// CHECK16-NEXT:    store i64 [[TMP65]], i64* [[TMP86]], align 8
+// CHECK16-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS23]], i64 0, i64 3
+// CHECK16-NEXT:    store i8* null, i8** [[TMP87]], align 8
+// CHECK16-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES24]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP91:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP91]], i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK16-NEXT:    [[TMP92:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK16-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP92]], 0
 // CHECK16-NEXT:    [[DIV29:%.*]] = sdiv i32 [[SUB28]], 1
 // CHECK16-NEXT:    [[SUB30:%.*]] = sub nsw i32 [[DIV29]], 1
 // CHECK16-NEXT:    store i32 [[SUB30]], i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK16-NEXT:    [[TMP97:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
-// CHECK16-NEXT:    [[ADD31:%.*]] = add nsw i32 [[TMP97]], 1
-// CHECK16-NEXT:    [[TMP98:%.*]] = zext i32 [[ADD31]] to i64
-// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP98]])
-// CHECK16-NEXT:    [[TMP99:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164.region_id, i32 4, i8** [[TMP92]], i8** [[TMP93]], i64* [[TMP94]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK16-NEXT:    [[TMP100:%.*]] = icmp ne i32 [[TMP99]], 0
-// CHECK16-NEXT:    br i1 [[TMP100]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
+// CHECK16-NEXT:    [[TMP93:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_27]], align 4
+// CHECK16-NEXT:    [[ADD31:%.*]] = add nsw i32 [[TMP93]], 1
+// CHECK16-NEXT:    [[TMP94:%.*]] = zext i32 [[ADD31]] to i64
+// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP94]])
+// CHECK16-NEXT:    [[TMP95:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164.region_id, i32 4, i8** [[TMP88]], i8** [[TMP89]], i64* [[TMP90]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP96:%.*]] = icmp ne i32 [[TMP95]], 0
+// CHECK16-NEXT:    br i1 [[TMP96]], label [[OMP_OFFLOAD_FAILED32:%.*]], label [[OMP_OFFLOAD_CONT33:%.*]]
 // CHECK16:       omp_offload.failed32:
-// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164(i64 [[TMP64]], i64 [[TMP66]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164(i64 [[TMP62]], i64 [[TMP64]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK16-NEXT:    br label [[OMP_OFFLOAD_CONT33]]
 // CHECK16:       omp_offload.cont33:
-// CHECK16-NEXT:    [[TMP101:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    [[TMP97:%.*]] = load i32, i32* [[N]], align 4
 // CHECK16-NEXT:    [[CONV35:%.*]] = bitcast i64* [[N_CASTED34]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP101]], i32* [[CONV35]], align 4
-// CHECK16-NEXT:    [[TMP102:%.*]] = load i64, i64* [[N_CASTED34]], align 8
-// CHECK16-NEXT:    [[TMP103:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK16-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i64*
-// CHECK16-NEXT:    store i64 [[TMP102]], i64* [[TMP105]], align 8
-// CHECK16-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK16-NEXT:    store i32 [[TMP97]], i32* [[CONV35]], align 4
+// CHECK16-NEXT:    [[TMP98:%.*]] = load i64, i64* [[N_CASTED34]], align 8
+// CHECK16-NEXT:    [[TMP99:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK16-NEXT:    [[TMP100:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES39]] to i8*
+// CHECK16-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP100]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i64 24, i1 false)
+// CHECK16-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP102:%.*]] = bitcast i8** [[TMP101]] to i64*
+// CHECK16-NEXT:    store i64 [[TMP98]], i64* [[TMP102]], align 8
+// CHECK16-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP104:%.*]] = bitcast i8** [[TMP103]] to i64*
+// CHECK16-NEXT:    store i64 [[TMP98]], i64* [[TMP104]], align 8
+// CHECK16-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 0
+// CHECK16-NEXT:    store i8* null, i8** [[TMP105]], align 8
+// CHECK16-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 1
 // CHECK16-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i64*
-// CHECK16-NEXT:    store i64 [[TMP102]], i64* [[TMP107]], align 8
-// CHECK16-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 0
-// CHECK16-NEXT:    store i64 4, i64* [[TMP108]], align 8
-// CHECK16-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 0
-// CHECK16-NEXT:    store i8* null, i8** [[TMP109]], align 8
-// CHECK16-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 1
-// CHECK16-NEXT:    [[TMP111:%.*]] = bitcast i8** [[TMP110]] to i64*
-// CHECK16-NEXT:    store i64 [[TMP1]], i64* [[TMP111]], align 8
-// CHECK16-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 1
-// CHECK16-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i64*
-// CHECK16-NEXT:    store i64 [[TMP1]], i64* [[TMP113]], align 8
-// CHECK16-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 1
-// CHECK16-NEXT:    store i64 8, i64* [[TMP114]], align 8
-// CHECK16-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 1
-// CHECK16-NEXT:    store i8* null, i8** [[TMP115]], align 8
-// CHECK16-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP117]], align 8
-// CHECK16-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP119:%.*]] = bitcast i8** [[TMP118]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP119]], align 8
-// CHECK16-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 2
-// CHECK16-NEXT:    store i64 [[TMP103]], i64* [[TMP120]], align 8
-// CHECK16-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 2
-// CHECK16-NEXT:    store i8* null, i8** [[TMP121]], align 8
-// CHECK16-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP125:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP125]], i32* [[DOTCAPTURE_EXPR_41]], align 4
-// CHECK16-NEXT:    [[TMP126:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_41]], align 4
-// CHECK16-NEXT:    [[SUB43:%.*]] = sub nsw i32 [[TMP126]], 0
+// CHECK16-NEXT:    store i64 [[TMP1]], i64* [[TMP107]], align 8
+// CHECK16-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP109:%.*]] = bitcast i8** [[TMP108]] to i64*
+// CHECK16-NEXT:    store i64 [[TMP1]], i64* [[TMP109]], align 8
+// CHECK16-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 1
+// CHECK16-NEXT:    store i8* null, i8** [[TMP110]], align 8
+// CHECK16-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP112]], align 8
+// CHECK16-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 2
+// CHECK16-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP114]], align 8
+// CHECK16-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 2
+// CHECK16-NEXT:    store i64 [[TMP99]], i64* [[TMP115]], align 8
+// CHECK16-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS38]], i64 0, i64 2
+// CHECK16-NEXT:    store i8* null, i8** [[TMP116]], align 8
+// CHECK16-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS37]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES39]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP120:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP120]], i32* [[DOTCAPTURE_EXPR_41]], align 4
+// CHECK16-NEXT:    [[TMP121:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_41]], align 4
+// CHECK16-NEXT:    [[SUB43:%.*]] = sub nsw i32 [[TMP121]], 0
 // CHECK16-NEXT:    [[DIV44:%.*]] = sdiv i32 [[SUB43]], 1
 // CHECK16-NEXT:    [[SUB45:%.*]] = sub nsw i32 [[DIV44]], 1
 // CHECK16-NEXT:    store i32 [[SUB45]], i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK16-NEXT:    [[TMP127:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
-// CHECK16-NEXT:    [[ADD46:%.*]] = add nsw i32 [[TMP127]], 1
-// CHECK16-NEXT:    [[TMP128:%.*]] = zext i32 [[ADD46]] to i64
-// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP128]])
-// CHECK16-NEXT:    [[TMP129:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169.region_id, i32 3, i8** [[TMP122]], i8** [[TMP123]], i64* [[TMP124]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK16-NEXT:    [[TMP130:%.*]] = icmp ne i32 [[TMP129]], 0
-// CHECK16-NEXT:    br i1 [[TMP130]], label [[OMP_OFFLOAD_FAILED47:%.*]], label [[OMP_OFFLOAD_CONT48:%.*]]
+// CHECK16-NEXT:    [[TMP122:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_42]], align 4
+// CHECK16-NEXT:    [[ADD46:%.*]] = add nsw i32 [[TMP122]], 1
+// CHECK16-NEXT:    [[TMP123:%.*]] = zext i32 [[ADD46]] to i64
+// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP123]])
+// CHECK16-NEXT:    [[TMP124:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169.region_id, i32 3, i8** [[TMP117]], i8** [[TMP118]], i64* [[TMP119]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP125:%.*]] = icmp ne i32 [[TMP124]], 0
+// CHECK16-NEXT:    br i1 [[TMP125]], label [[OMP_OFFLOAD_FAILED47:%.*]], label [[OMP_OFFLOAD_CONT48:%.*]]
 // CHECK16:       omp_offload.failed47:
-// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169(i64 [[TMP102]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169(i64 [[TMP98]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK16-NEXT:    br label [[OMP_OFFLOAD_CONT48]]
 // CHECK16:       omp_offload.cont48:
-// CHECK16-NEXT:    [[TMP131:%.*]] = load i32, i32* [[M]], align 4
+// CHECK16-NEXT:    [[TMP126:%.*]] = load i32, i32* [[M]], align 4
 // CHECK16-NEXT:    [[CONV50:%.*]] = bitcast i64* [[M_CASTED49]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP131]], i32* [[CONV50]], align 4
-// CHECK16-NEXT:    [[TMP132:%.*]] = load i64, i64* [[M_CASTED49]], align 8
-// CHECK16-NEXT:    [[TMP133:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP126]], i32* [[CONV50]], align 4
+// CHECK16-NEXT:    [[TMP127:%.*]] = load i64, i64* [[M_CASTED49]], align 8
+// CHECK16-NEXT:    [[TMP128:%.*]] = load i32, i32* [[N]], align 4
 // CHECK16-NEXT:    [[CONV52:%.*]] = bitcast i64* [[N_CASTED51]] to i32*
-// CHECK16-NEXT:    store i32 [[TMP133]], i32* [[CONV52]], align 4
-// CHECK16-NEXT:    [[TMP134:%.*]] = load i64, i64* [[N_CASTED51]], align 8
-// CHECK16-NEXT:    [[TMP135:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK16-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i64*
-// CHECK16-NEXT:    store i64 [[TMP132]], i64* [[TMP137]], align 8
-// CHECK16-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i64*
-// CHECK16-NEXT:    store i64 [[TMP132]], i64* [[TMP139]], align 8
-// CHECK16-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 0
-// CHECK16-NEXT:    store i64 4, i64* [[TMP140]], align 8
-// CHECK16-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 0
+// CHECK16-NEXT:    store i32 [[TMP128]], i32* [[CONV52]], align 4
+// CHECK16-NEXT:    [[TMP129:%.*]] = load i64, i64* [[N_CASTED51]], align 8
+// CHECK16-NEXT:    [[TMP130:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK16-NEXT:    [[TMP131:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES56]] to i8*
+// CHECK16-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP131]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i64 32, i1 false)
+// CHECK16-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to i64*
+// CHECK16-NEXT:    store i64 [[TMP127]], i64* [[TMP133]], align 8
+// CHECK16-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to i64*
+// CHECK16-NEXT:    store i64 [[TMP127]], i64* [[TMP135]], align 8
+// CHECK16-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 0
+// CHECK16-NEXT:    store i8* null, i8** [[TMP136]], align 8
+// CHECK16-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to i64*
+// CHECK16-NEXT:    store i64 [[TMP129]], i64* [[TMP138]], align 8
+// CHECK16-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to i64*
+// CHECK16-NEXT:    store i64 [[TMP129]], i64* [[TMP140]], align 8
+// CHECK16-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 1
 // CHECK16-NEXT:    store i8* null, i8** [[TMP141]], align 8
-// CHECK16-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 1
+// CHECK16-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 2
 // CHECK16-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i64*
-// CHECK16-NEXT:    store i64 [[TMP134]], i64* [[TMP143]], align 8
-// CHECK16-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 1
+// CHECK16-NEXT:    store i64 [[TMP1]], i64* [[TMP143]], align 8
+// CHECK16-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 2
 // CHECK16-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i64*
-// CHECK16-NEXT:    store i64 [[TMP134]], i64* [[TMP145]], align 8
-// CHECK16-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 1
-// CHECK16-NEXT:    store i64 4, i64* [[TMP146]], align 8
-// CHECK16-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 1
-// CHECK16-NEXT:    store i8* null, i8** [[TMP147]], align 8
-// CHECK16-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i64*
-// CHECK16-NEXT:    store i64 [[TMP1]], i64* [[TMP149]], align 8
-// CHECK16-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 2
-// CHECK16-NEXT:    [[TMP151:%.*]] = bitcast i8** [[TMP150]] to i64*
-// CHECK16-NEXT:    store i64 [[TMP1]], i64* [[TMP151]], align 8
-// CHECK16-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 2
-// CHECK16-NEXT:    store i64 8, i64* [[TMP152]], align 8
-// CHECK16-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 2
-// CHECK16-NEXT:    store i8* null, i8** [[TMP153]], align 8
-// CHECK16-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 3
-// CHECK16-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP155]], align 8
-// CHECK16-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 3
-// CHECK16-NEXT:    [[TMP157:%.*]] = bitcast i8** [[TMP156]] to i32**
-// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP157]], align 8
-// CHECK16-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 3
-// CHECK16-NEXT:    store i64 [[TMP135]], i64* [[TMP158]], align 8
-// CHECK16-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 3
-// CHECK16-NEXT:    store i8* null, i8** [[TMP159]], align 8
-// CHECK16-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 0
-// CHECK16-NEXT:    [[TMP163:%.*]] = load i32, i32* [[N]], align 4
-// CHECK16-NEXT:    store i32 [[TMP163]], i32* [[DOTCAPTURE_EXPR_58]], align 4
-// CHECK16-NEXT:    [[TMP164:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_58]], align 4
-// CHECK16-NEXT:    [[SUB60:%.*]] = sub nsw i32 [[TMP164]], 0
+// CHECK16-NEXT:    store i64 [[TMP1]], i64* [[TMP145]], align 8
+// CHECK16-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 2
+// CHECK16-NEXT:    store i8* null, i8** [[TMP146]], align 8
+// CHECK16-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 3
+// CHECK16-NEXT:    [[TMP148:%.*]] = bitcast i8** [[TMP147]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP148]], align 8
+// CHECK16-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 3
+// CHECK16-NEXT:    [[TMP150:%.*]] = bitcast i8** [[TMP149]] to i32**
+// CHECK16-NEXT:    store i32* [[VLA]], i32** [[TMP150]], align 8
+// CHECK16-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 3
+// CHECK16-NEXT:    store i64 [[TMP130]], i64* [[TMP151]], align 8
+// CHECK16-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS55]], i64 0, i64 3
+// CHECK16-NEXT:    store i8* null, i8** [[TMP152]], align 8
+// CHECK16-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS53]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS54]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES56]], i32 0, i32 0
+// CHECK16-NEXT:    [[TMP156:%.*]] = load i32, i32* [[N]], align 4
+// CHECK16-NEXT:    store i32 [[TMP156]], i32* [[DOTCAPTURE_EXPR_58]], align 4
+// CHECK16-NEXT:    [[TMP157:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_58]], align 4
+// CHECK16-NEXT:    [[SUB60:%.*]] = sub nsw i32 [[TMP157]], 0
 // CHECK16-NEXT:    [[DIV61:%.*]] = sdiv i32 [[SUB60]], 1
 // CHECK16-NEXT:    [[SUB62:%.*]] = sub nsw i32 [[DIV61]], 1
 // CHECK16-NEXT:    store i32 [[SUB62]], i32* [[DOTCAPTURE_EXPR_59]], align 4
-// CHECK16-NEXT:    [[TMP165:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_59]], align 4
-// CHECK16-NEXT:    [[ADD63:%.*]] = add nsw i32 [[TMP165]], 1
-// CHECK16-NEXT:    [[TMP166:%.*]] = zext i32 [[ADD63]] to i64
-// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP166]])
-// CHECK16-NEXT:    [[TMP167:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174.region_id, i32 4, i8** [[TMP160]], i8** [[TMP161]], i64* [[TMP162]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK16-NEXT:    [[TMP168:%.*]] = icmp ne i32 [[TMP167]], 0
-// CHECK16-NEXT:    br i1 [[TMP168]], label [[OMP_OFFLOAD_FAILED64:%.*]], label [[OMP_OFFLOAD_CONT65:%.*]]
+// CHECK16-NEXT:    [[TMP158:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_59]], align 4
+// CHECK16-NEXT:    [[ADD63:%.*]] = add nsw i32 [[TMP158]], 1
+// CHECK16-NEXT:    [[TMP159:%.*]] = zext i32 [[ADD63]] to i64
+// CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP159]])
+// CHECK16-NEXT:    [[TMP160:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174.region_id, i32 4, i8** [[TMP153]], i8** [[TMP154]], i64* [[TMP155]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP161:%.*]] = icmp ne i32 [[TMP160]], 0
+// CHECK16-NEXT:    br i1 [[TMP161]], label [[OMP_OFFLOAD_FAILED64:%.*]], label [[OMP_OFFLOAD_CONT65:%.*]]
 // CHECK16:       omp_offload.failed64:
-// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174(i64 [[TMP132]], i64 [[TMP134]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK16-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174(i64 [[TMP127]], i64 [[TMP129]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK16-NEXT:    br label [[OMP_OFFLOAD_CONT65]]
 // CHECK16:       omp_offload.cont65:
-// CHECK16-NEXT:    [[TMP169:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK16-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP169]])
+// CHECK16-NEXT:    [[TMP162:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK16-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP162]])
 // CHECK16-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK16-NEXT:    [[TMP170:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK16-NEXT:    call void @llvm.stackrestore(i8* [[TMP170]])
-// CHECK16-NEXT:    [[TMP171:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK16-NEXT:    ret i32 [[TMP171]]
+// CHECK16-NEXT:    [[TMP163:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK16-NEXT:    call void @llvm.stackrestore(i8* [[TMP163]])
+// CHECK16-NEXT:    [[TMP164:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK16-NEXT:    ret i32 [[TMP164]]
 //
 //
 // CHECK16-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154
@@ -17604,11 +17548,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK16-NEXT:    store i32 [[TMP3]], i32* [[CONV2]], align 4
 // CHECK16-NEXT:    [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -17690,7 +17634,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK16-NEXT:    store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !28
 // CHECK16-NEXT:    [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]), !llvm.access.group !28
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]), !llvm.access.group !28
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28
@@ -17744,7 +17688,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -17876,11 +17820,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32*
 // CHECK16-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8
 // CHECK16-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -17952,7 +17896,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
 // CHECK16-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34
 // CHECK16-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP18]] to i64
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !34
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !34
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34
@@ -17983,7 +17927,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -18119,11 +18063,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK16-NEXT:    store i32 [[TMP3]], i32* [[CONV2]], align 4
 // CHECK16-NEXT:    [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[CONV1]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -18203,7 +18147,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK16-NEXT:    store i32 [[TMP20]], i32* [[CONV7]], align 4, !llvm.access.group !40
 // CHECK16-NEXT:    [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]]), !llvm.access.group !40
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]]), !llvm.access.group !40
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40
@@ -18234,7 +18178,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -18352,7 +18296,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK16-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK16-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK16-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK16-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -18392,7 +18336,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK16-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l122.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l122.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK16-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK16-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK16:       omp_offload.failed:
@@ -18410,7 +18354,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK16-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l127.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l127.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK16-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK16-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK16:       omp_offload.failed5:
@@ -18440,7 +18384,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK16-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l132.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l132.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK16-NEXT:    [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
 // CHECK16-NEXT:    br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK16:       omp_offload.failed11:
@@ -18458,7 +18402,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK16-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l137.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l137.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK16-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
 // CHECK16-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK16:       omp_offload.failed17:
@@ -18488,7 +18432,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS21]], i32 0, i32 0
 // CHECK16-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS22]], i32 0, i32 0
 // CHECK16-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK16-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l142.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK16-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l142.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK16-NEXT:    [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0
 // CHECK16-NEXT:    br i1 [[TMP58]], label [[OMP_OFFLOAD_FAILED25:%.*]], label [[OMP_OFFLOAD_CONT26:%.*]]
 // CHECK16:       omp_offload.failed25:
@@ -18504,11 +18448,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK16-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK16-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -18556,7 +18500,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK16-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46
 // CHECK16-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46
@@ -18578,7 +18522,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -18668,11 +18612,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK16-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK16-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -18720,7 +18664,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK16-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52
 // CHECK16-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52
@@ -18742,7 +18686,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -18843,11 +18787,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK16-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK16-NEXT:    [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -18903,7 +18847,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK16-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58
 // CHECK16-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58
@@ -18925,7 +18869,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -19040,11 +18984,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK16-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK16-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -19092,7 +19036,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
 // CHECK16-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64
 // CHECK16-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP10]] to i64
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64
@@ -19114,7 +19058,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -19213,11 +19157,11 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK16-NEXT:    store i32 [[TMP2]], i32* [[CONV1]], align 4
 // CHECK16-NEXT:    [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i64)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i64 [[TMP3]])
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -19273,7 +19217,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK16-NEXT:    store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70
 // CHECK16-NEXT:    [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70
-// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70
+// CHECK16-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70
 // CHECK16-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK16:       omp.inner.for.inc:
 // CHECK16-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70
@@ -19295,7 +19239,7 @@ int main (int argc, char **argv) {
 // CHECK16-NEXT:    ret void
 //
 //
-// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -19382,7 +19326,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK16-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK16-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK16-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK16-NEXT:  entry:
 // CHECK16-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK16-NEXT:    ret void
@@ -19455,14 +19399,14 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK17-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK17-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK17-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK17-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK17-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK17-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK17-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK17-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK17-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK17-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK17-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK17-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK17-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -19471,298 +19415,284 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK17-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK17-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK17-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK17-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK17-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK17-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK17-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK17-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK17-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK17-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK17-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK17-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK17-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK17-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK17-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK17-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK17-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK17-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK17-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK17-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK17-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK17-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK17-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK17-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK17-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK17-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK17-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK17-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK17-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK17-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK17-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK17-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK17-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK17:       omp_offload.failed:
 // CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK17:       omp_offload.cont:
-// CHECK17-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK17-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK17-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK17-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK17-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK17-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK17-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK17-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK17-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK17-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i32 24, i1 false)
 // CHECK17-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK17-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK17-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK17-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK17-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK17-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK17-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK17-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK17-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK17-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK17-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK17-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK17-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK17-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK17-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK17-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK17-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK17-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK17-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK17-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK17-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK17-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK17-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK17-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK17-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK17-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK17-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK17-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK17-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK17-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK17-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK17-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK17-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK17-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK17-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK17-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK17-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK17-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK17-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK17-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK17-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP61]])
-// CHECK17-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK17-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK17-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK17-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK17-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP59]])
+// CHECK17-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK17-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK17:       omp_offload.failed15:
-// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK17:       omp_offload.cont16:
-// CHECK17-NEXT:    [[TMP64:%.*]] = load i32, i32* [[M]], align 4
-// CHECK17-NEXT:    store i32 [[TMP64]], i32* [[M_CASTED]], align 4
-// CHECK17-NEXT:    [[TMP65:%.*]] = load i32, i32* [[M_CASTED]], align 4
-// CHECK17-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N]], align 4
-// CHECK17-NEXT:    store i32 [[TMP66]], i32* [[N_CASTED17]], align 4
-// CHECK17-NEXT:    [[TMP67:%.*]] = load i32, i32* [[N_CASTED17]], align 4
-// CHECK17-NEXT:    [[TMP68:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK17-NEXT:    [[TMP69:%.*]] = sext i32 [[TMP68]] to i64
-// CHECK17-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP65]], i32* [[TMP71]], align 4
-// CHECK17-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP65]], i32* [[TMP73]], align 4
-// CHECK17-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 4, i64* [[TMP74]], align 4
-// CHECK17-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
-// CHECK17-NEXT:    store i8* null, i8** [[TMP75]], align 4
-// CHECK17-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP62:%.*]] = load i32, i32* [[M]], align 4
+// CHECK17-NEXT:    store i32 [[TMP62]], i32* [[M_CASTED]], align 4
+// CHECK17-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M_CASTED]], align 4
+// CHECK17-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP64]], i32* [[N_CASTED17]], align 4
+// CHECK17-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N_CASTED17]], align 4
+// CHECK17-NEXT:    [[TMP66:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK17-NEXT:    [[TMP67:%.*]] = sext i32 [[TMP66]] to i64
+// CHECK17-NEXT:    [[TMP68:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES21]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP68]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i32 32, i1 false)
+// CHECK17-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i32*
+// CHECK17-NEXT:    store i32 [[TMP63]], i32* [[TMP70]], align 4
+// CHECK17-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
+// CHECK17-NEXT:    store i32 [[TMP63]], i32* [[TMP72]], align 4
+// CHECK17-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
+// CHECK17-NEXT:    store i8* null, i8** [[TMP73]], align 4
+// CHECK17-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i32*
+// CHECK17-NEXT:    store i32 [[TMP65]], i32* [[TMP75]], align 4
+// CHECK17-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
 // CHECK17-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP67]], i32* [[TMP77]], align 4
-// CHECK17-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP67]], i32* [[TMP79]], align 4
-// CHECK17-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 4, i64* [[TMP80]], align 4
-// CHECK17-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP81]], align 4
-// CHECK17-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP0]], i32* [[TMP83]], align 4
-// CHECK17-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP0]], i32* [[TMP85]], align 4
-// CHECK17-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 4, i64* [[TMP86]], align 4
-// CHECK17-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP87]], align 4
-// CHECK17-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP89]], align 4
-// CHECK17-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP91]], align 4
-// CHECK17-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 3
-// CHECK17-NEXT:    store i64 [[TMP69]], i64* [[TMP92]], align 4
-// CHECK17-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
-// CHECK17-NEXT:    store i8* null, i8** [[TMP93]], align 4
-// CHECK17-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP97:%.*]] = load i32, i32* [[N]], align 4
-// CHECK17-NEXT:    store i32 [[TMP97]], i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK17-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK17-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP98]], 0
+// CHECK17-NEXT:    store i32 [[TMP65]], i32* [[TMP77]], align 4
+// CHECK17-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP78]], align 4
+// CHECK17-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
+// CHECK17-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
+// CHECK17-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32*
+// CHECK17-NEXT:    store i32 [[TMP0]], i32* [[TMP82]], align 4
+// CHECK17-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP83]], align 4
+// CHECK17-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP85]], align 4
+// CHECK17-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP87]], align 4
+// CHECK17-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 3
+// CHECK17-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 4
+// CHECK17-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
+// CHECK17-NEXT:    store i8* null, i8** [[TMP89]], align 4
+// CHECK17-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP93:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP93]], i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK17-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK17-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP94]], 0
 // CHECK17-NEXT:    [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1
 // CHECK17-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[DIV26]], 1
 // CHECK17-NEXT:    store i32 [[SUB27]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK17-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK17-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP99]], 1
-// CHECK17-NEXT:    [[TMP100:%.*]] = zext i32 [[ADD28]] to i64
-// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP100]])
-// CHECK17-NEXT:    [[TMP101:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164.region_id, i32 4, i8** [[TMP94]], i8** [[TMP95]], i64* [[TMP96]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP102:%.*]] = icmp ne i32 [[TMP101]], 0
-// CHECK17-NEXT:    br i1 [[TMP102]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
+// CHECK17-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK17-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP95]], 1
+// CHECK17-NEXT:    [[TMP96:%.*]] = zext i32 [[ADD28]] to i64
+// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP96]])
+// CHECK17-NEXT:    [[TMP97:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164.region_id, i32 4, i8** [[TMP90]], i8** [[TMP91]], i64* [[TMP92]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP98:%.*]] = icmp ne i32 [[TMP97]], 0
+// CHECK17-NEXT:    br i1 [[TMP98]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
 // CHECK17:       omp_offload.failed29:
-// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164(i32 [[TMP65]], i32 [[TMP67]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164(i32 [[TMP63]], i32 [[TMP65]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT30]]
 // CHECK17:       omp_offload.cont30:
-// CHECK17-NEXT:    [[TMP103:%.*]] = load i32, i32* [[N]], align 4
-// CHECK17-NEXT:    store i32 [[TMP103]], i32* [[N_CASTED31]], align 4
-// CHECK17-NEXT:    [[TMP104:%.*]] = load i32, i32* [[N_CASTED31]], align 4
-// CHECK17-NEXT:    [[TMP105:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK17-NEXT:    [[TMP106:%.*]] = sext i32 [[TMP105]] to i64
-// CHECK17-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP104]], i32* [[TMP108]], align 4
-// CHECK17-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP99:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP99]], i32* [[N_CASTED31]], align 4
+// CHECK17-NEXT:    [[TMP100:%.*]] = load i32, i32* [[N_CASTED31]], align 4
+// CHECK17-NEXT:    [[TMP101:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK17-NEXT:    [[TMP102:%.*]] = sext i32 [[TMP101]] to i64
+// CHECK17-NEXT:    [[TMP103:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES35]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP103]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i32 24, i1 false)
+// CHECK17-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i32*
+// CHECK17-NEXT:    store i32 [[TMP100]], i32* [[TMP105]], align 4
+// CHECK17-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i32*
+// CHECK17-NEXT:    store i32 [[TMP100]], i32* [[TMP107]], align 4
+// CHECK17-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 0
+// CHECK17-NEXT:    store i8* null, i8** [[TMP108]], align 4
+// CHECK17-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 1
 // CHECK17-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP104]], i32* [[TMP110]], align 4
-// CHECK17-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 4, i64* [[TMP111]], align 4
-// CHECK17-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 0
-// CHECK17-NEXT:    store i8* null, i8** [[TMP112]], align 4
-// CHECK17-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP0]], i32* [[TMP114]], align 4
-// CHECK17-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP0]], i32* [[TMP116]], align 4
-// CHECK17-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 4, i64* [[TMP117]], align 4
-// CHECK17-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP118]], align 4
-// CHECK17-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP120]], align 4
-// CHECK17-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP122]], align 4
-// CHECK17-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 [[TMP106]], i64* [[TMP123]], align 4
-// CHECK17-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP124]], align 4
-// CHECK17-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP128:%.*]] = load i32, i32* [[N]], align 4
-// CHECK17-NEXT:    store i32 [[TMP128]], i32* [[DOTCAPTURE_EXPR_37]], align 4
-// CHECK17-NEXT:    [[TMP129:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4
-// CHECK17-NEXT:    [[SUB39:%.*]] = sub nsw i32 [[TMP129]], 0
+// CHECK17-NEXT:    store i32 [[TMP0]], i32* [[TMP110]], align 4
+// CHECK17-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32*
+// CHECK17-NEXT:    store i32 [[TMP0]], i32* [[TMP112]], align 4
+// CHECK17-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP113]], align 4
+// CHECK17-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP115]], align 4
+// CHECK17-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP117]], align 4
+// CHECK17-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 2
+// CHECK17-NEXT:    store i64 [[TMP102]], i64* [[TMP118]], align 4
+// CHECK17-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP119]], align 4
+// CHECK17-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP123:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP123]], i32* [[DOTCAPTURE_EXPR_37]], align 4
+// CHECK17-NEXT:    [[TMP124:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4
+// CHECK17-NEXT:    [[SUB39:%.*]] = sub nsw i32 [[TMP124]], 0
 // CHECK17-NEXT:    [[DIV40:%.*]] = sdiv i32 [[SUB39]], 1
 // CHECK17-NEXT:    [[SUB41:%.*]] = sub nsw i32 [[DIV40]], 1
 // CHECK17-NEXT:    store i32 [[SUB41]], i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK17-NEXT:    [[TMP130:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK17-NEXT:    [[ADD42:%.*]] = add nsw i32 [[TMP130]], 1
-// CHECK17-NEXT:    [[TMP131:%.*]] = zext i32 [[ADD42]] to i64
-// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP131]])
-// CHECK17-NEXT:    [[TMP132:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169.region_id, i32 3, i8** [[TMP125]], i8** [[TMP126]], i64* [[TMP127]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP133:%.*]] = icmp ne i32 [[TMP132]], 0
-// CHECK17-NEXT:    br i1 [[TMP133]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
+// CHECK17-NEXT:    [[TMP125:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
+// CHECK17-NEXT:    [[ADD42:%.*]] = add nsw i32 [[TMP125]], 1
+// CHECK17-NEXT:    [[TMP126:%.*]] = zext i32 [[ADD42]] to i64
+// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP126]])
+// CHECK17-NEXT:    [[TMP127:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169.region_id, i32 3, i8** [[TMP120]], i8** [[TMP121]], i64* [[TMP122]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP128:%.*]] = icmp ne i32 [[TMP127]], 0
+// CHECK17-NEXT:    br i1 [[TMP128]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
 // CHECK17:       omp_offload.failed43:
-// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169(i32 [[TMP104]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169(i32 [[TMP100]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT44]]
 // CHECK17:       omp_offload.cont44:
-// CHECK17-NEXT:    [[TMP134:%.*]] = load i32, i32* [[M]], align 4
-// CHECK17-NEXT:    store i32 [[TMP134]], i32* [[M_CASTED45]], align 4
-// CHECK17-NEXT:    [[TMP135:%.*]] = load i32, i32* [[M_CASTED45]], align 4
-// CHECK17-NEXT:    [[TMP136:%.*]] = load i32, i32* [[N]], align 4
-// CHECK17-NEXT:    store i32 [[TMP136]], i32* [[N_CASTED46]], align 4
-// CHECK17-NEXT:    [[TMP137:%.*]] = load i32, i32* [[N_CASTED46]], align 4
-// CHECK17-NEXT:    [[TMP138:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK17-NEXT:    [[TMP139:%.*]] = sext i32 [[TMP138]] to i64
-// CHECK17-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP135]], i32* [[TMP141]], align 4
-// CHECK17-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP135]], i32* [[TMP143]], align 4
-// CHECK17-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 4, i64* [[TMP144]], align 4
-// CHECK17-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP129:%.*]] = load i32, i32* [[M]], align 4
+// CHECK17-NEXT:    store i32 [[TMP129]], i32* [[M_CASTED45]], align 4
+// CHECK17-NEXT:    [[TMP130:%.*]] = load i32, i32* [[M_CASTED45]], align 4
+// CHECK17-NEXT:    [[TMP131:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP131]], i32* [[N_CASTED46]], align 4
+// CHECK17-NEXT:    [[TMP132:%.*]] = load i32, i32* [[N_CASTED46]], align 4
+// CHECK17-NEXT:    [[TMP133:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK17-NEXT:    [[TMP134:%.*]] = sext i32 [[TMP133]] to i64
+// CHECK17-NEXT:    [[TMP135:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES50]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP135]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i32 32, i1 false)
+// CHECK17-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i32*
+// CHECK17-NEXT:    store i32 [[TMP130]], i32* [[TMP137]], align 4
+// CHECK17-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i32*
+// CHECK17-NEXT:    store i32 [[TMP130]], i32* [[TMP139]], align 4
+// CHECK17-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 0
+// CHECK17-NEXT:    store i8* null, i8** [[TMP140]], align 4
+// CHECK17-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to i32*
+// CHECK17-NEXT:    store i32 [[TMP132]], i32* [[TMP142]], align 4
+// CHECK17-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to i32*
+// CHECK17-NEXT:    store i32 [[TMP132]], i32* [[TMP144]], align 4
+// CHECK17-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 1
 // CHECK17-NEXT:    store i8* null, i8** [[TMP145]], align 4
-// CHECK17-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 2
 // CHECK17-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP137]], i32* [[TMP147]], align 4
-// CHECK17-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 1
+// CHECK17-NEXT:    store i32 [[TMP0]], i32* [[TMP147]], align 4
+// CHECK17-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 2
 // CHECK17-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP137]], i32* [[TMP149]], align 4
-// CHECK17-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 4, i64* [[TMP150]], align 4
-// CHECK17-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP151]], align 4
-// CHECK17-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP0]], i32* [[TMP153]], align 4
-// CHECK17-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32*
-// CHECK17-NEXT:    store i32 [[TMP0]], i32* [[TMP155]], align 4
-// CHECK17-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 4, i64* [[TMP156]], align 4
-// CHECK17-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP157]], align 4
-// CHECK17-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP159]], align 4
-// CHECK17-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP161:%.*]] = bitcast i8** [[TMP160]] to i32**
-// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP161]], align 4
-// CHECK17-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 3
-// CHECK17-NEXT:    store i64 [[TMP139]], i64* [[TMP162]], align 4
-// CHECK17-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 3
-// CHECK17-NEXT:    store i8* null, i8** [[TMP163]], align 4
-// CHECK17-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP167:%.*]] = load i32, i32* [[N]], align 4
-// CHECK17-NEXT:    store i32 [[TMP167]], i32* [[DOTCAPTURE_EXPR_52]], align 4
-// CHECK17-NEXT:    [[TMP168:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_52]], align 4
-// CHECK17-NEXT:    [[SUB54:%.*]] = sub nsw i32 [[TMP168]], 0
+// CHECK17-NEXT:    store i32 [[TMP0]], i32* [[TMP149]], align 4
+// CHECK17-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP150]], align 4
+// CHECK17-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP152:%.*]] = bitcast i8** [[TMP151]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP152]], align 4
+// CHECK17-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP154:%.*]] = bitcast i8** [[TMP153]] to i32**
+// CHECK17-NEXT:    store i32* [[VLA]], i32** [[TMP154]], align 4
+// CHECK17-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 3
+// CHECK17-NEXT:    store i64 [[TMP134]], i64* [[TMP155]], align 4
+// CHECK17-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 3
+// CHECK17-NEXT:    store i8* null, i8** [[TMP156]], align 4
+// CHECK17-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP160:%.*]] = load i32, i32* [[N]], align 4
+// CHECK17-NEXT:    store i32 [[TMP160]], i32* [[DOTCAPTURE_EXPR_52]], align 4
+// CHECK17-NEXT:    [[TMP161:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_52]], align 4
+// CHECK17-NEXT:    [[SUB54:%.*]] = sub nsw i32 [[TMP161]], 0
 // CHECK17-NEXT:    [[DIV55:%.*]] = sdiv i32 [[SUB54]], 1
 // CHECK17-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[DIV55]], 1
 // CHECK17-NEXT:    store i32 [[SUB56]], i32* [[DOTCAPTURE_EXPR_53]], align 4
-// CHECK17-NEXT:    [[TMP169:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_53]], align 4
-// CHECK17-NEXT:    [[ADD57:%.*]] = add nsw i32 [[TMP169]], 1
-// CHECK17-NEXT:    [[TMP170:%.*]] = zext i32 [[ADD57]] to i64
-// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP170]])
-// CHECK17-NEXT:    [[TMP171:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174.region_id, i32 4, i8** [[TMP164]], i8** [[TMP165]], i64* [[TMP166]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP172:%.*]] = icmp ne i32 [[TMP171]], 0
-// CHECK17-NEXT:    br i1 [[TMP172]], label [[OMP_OFFLOAD_FAILED58:%.*]], label [[OMP_OFFLOAD_CONT59:%.*]]
+// CHECK17-NEXT:    [[TMP162:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_53]], align 4
+// CHECK17-NEXT:    [[ADD57:%.*]] = add nsw i32 [[TMP162]], 1
+// CHECK17-NEXT:    [[TMP163:%.*]] = zext i32 [[ADD57]] to i64
+// CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP163]])
+// CHECK17-NEXT:    [[TMP164:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174.region_id, i32 4, i8** [[TMP157]], i8** [[TMP158]], i64* [[TMP159]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP165:%.*]] = icmp ne i32 [[TMP164]], 0
+// CHECK17-NEXT:    br i1 [[TMP165]], label [[OMP_OFFLOAD_FAILED58:%.*]], label [[OMP_OFFLOAD_CONT59:%.*]]
 // CHECK17:       omp_offload.failed58:
-// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174(i32 [[TMP135]], i32 [[TMP137]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174(i32 [[TMP130]], i32 [[TMP132]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT59]]
 // CHECK17:       omp_offload.cont59:
-// CHECK17-NEXT:    [[TMP173:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK17-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP173]])
+// CHECK17-NEXT:    [[TMP166:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK17-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP166]])
 // CHECK17-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK17-NEXT:    [[TMP174:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP174]])
-// CHECK17-NEXT:    [[TMP175:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK17-NEXT:    ret i32 [[TMP175]]
+// CHECK17-NEXT:    [[TMP167:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP167]])
+// CHECK17-NEXT:    [[TMP168:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK17-NEXT:    ret i32 [[TMP168]]
 //
 //
 // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154
@@ -20245,11 +20175,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK17-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK17-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -20327,7 +20257,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !29
 // CHECK17-NEXT:    store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29
 // CHECK17-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]), !llvm.access.group !29
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]), !llvm.access.group !29
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29
@@ -20381,7 +20311,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -20508,11 +20438,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 4
 // CHECK17-NEXT:    [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
 // CHECK17-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -20582,7 +20512,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.inner.for.body:
 // CHECK17-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35
 // CHECK17-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !35
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !35
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35
@@ -20613,7 +20543,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -20743,11 +20673,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK17-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK17-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -20823,7 +20753,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !41
 // CHECK17-NEXT:    store i32 [[TMP18]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41
 // CHECK17-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]]), !llvm.access.group !41
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]]), !llvm.access.group !41
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41
@@ -20854,7 +20784,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -20968,7 +20898,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK17-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK17-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK17-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK17-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -21008,7 +20938,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK17-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l122.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l122.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK17-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK17-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK17:       omp_offload.failed:
@@ -21026,7 +20956,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK17-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l127.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l127.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK17-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK17-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK17:       omp_offload.failed5:
@@ -21055,7 +20985,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK17-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l132.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l132.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK17-NEXT:    [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
 // CHECK17-NEXT:    br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK17:       omp_offload.failed11:
@@ -21073,7 +21003,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK17-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l137.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l137.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK17-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
 // CHECK17-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK17:       omp_offload.failed17:
@@ -21102,7 +21032,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
 // CHECK17-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
 // CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK17-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l142.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l142.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK17-NEXT:    [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0
 // CHECK17-NEXT:    br i1 [[TMP58]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]]
 // CHECK17:       omp_offload.failed24:
@@ -21118,11 +21048,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK17-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK17-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -21168,7 +21098,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.inner.for.body:
 // CHECK17-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47
 // CHECK17-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47
@@ -21190,7 +21120,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -21277,11 +21207,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK17-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK17-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -21327,7 +21257,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.inner.for.body:
 // CHECK17-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !53
 // CHECK17-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53
@@ -21349,7 +21279,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -21445,11 +21375,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK17-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK17-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -21501,7 +21431,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !59
 // CHECK17-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59
 // CHECK17-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59
@@ -21523,7 +21453,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -21632,11 +21562,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK17-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK17-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -21682,7 +21612,7 @@ int main (int argc, char **argv) {
 // CHECK17:       omp.inner.for.body:
 // CHECK17-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !65
 // CHECK17-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65
@@ -21704,7 +21634,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -21798,11 +21728,11 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK17-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK17-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -21854,7 +21784,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !71
 // CHECK17-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71
 // CHECK17-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71
-// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71
+// CHECK17-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71
 // CHECK17-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK17:       omp.inner.for.inc:
 // CHECK17-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71
@@ -21876,7 +21806,7 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    ret void
 //
 //
-// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK17-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -21959,7 +21889,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK17-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK17-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK17-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK17-NEXT:    ret void
@@ -22032,14 +21962,14 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK18-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK18-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK18-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK18-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK18-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK18-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK18-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK18-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK18-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK18-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK18-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK18-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK18-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -22048,298 +21978,284 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK18-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK18-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK18-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK18-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK18-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK18-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK18-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK18-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK18-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK18-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK18-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK18-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK18-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK18-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK18-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK18-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK18-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK18-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK18-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK18-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK18-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK18-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK18-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK18-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK18-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK18-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK18-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK18-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK18-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK18-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK18-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK18-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK18-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK18:       omp_offload.failed:
 // CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK18:       omp_offload.cont:
-// CHECK18-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK18-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK18-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK18-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK18-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK18-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK18-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK18-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK18-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK18-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i32 24, i1 false)
 // CHECK18-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK18-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK18-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK18-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK18-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK18-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK18-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK18-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK18-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK18-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK18-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK18-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK18-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK18-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK18-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK18-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK18-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK18-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK18-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK18-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK18-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK18-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK18-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK18-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK18-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK18-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK18-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK18-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK18-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK18-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK18-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK18-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK18-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK18-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK18-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK18-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK18-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK18-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK18-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK18-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK18-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP61]])
-// CHECK18-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK18-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK18-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK18-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK18-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP59]])
+// CHECK18-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK18-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK18:       omp_offload.failed15:
-// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK18:       omp_offload.cont16:
-// CHECK18-NEXT:    [[TMP64:%.*]] = load i32, i32* [[M]], align 4
-// CHECK18-NEXT:    store i32 [[TMP64]], i32* [[M_CASTED]], align 4
-// CHECK18-NEXT:    [[TMP65:%.*]] = load i32, i32* [[M_CASTED]], align 4
-// CHECK18-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N]], align 4
-// CHECK18-NEXT:    store i32 [[TMP66]], i32* [[N_CASTED17]], align 4
-// CHECK18-NEXT:    [[TMP67:%.*]] = load i32, i32* [[N_CASTED17]], align 4
-// CHECK18-NEXT:    [[TMP68:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK18-NEXT:    [[TMP69:%.*]] = sext i32 [[TMP68]] to i64
-// CHECK18-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP65]], i32* [[TMP71]], align 4
-// CHECK18-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP65]], i32* [[TMP73]], align 4
-// CHECK18-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 4, i64* [[TMP74]], align 4
-// CHECK18-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
-// CHECK18-NEXT:    store i8* null, i8** [[TMP75]], align 4
-// CHECK18-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP62:%.*]] = load i32, i32* [[M]], align 4
+// CHECK18-NEXT:    store i32 [[TMP62]], i32* [[M_CASTED]], align 4
+// CHECK18-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M_CASTED]], align 4
+// CHECK18-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP64]], i32* [[N_CASTED17]], align 4
+// CHECK18-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N_CASTED17]], align 4
+// CHECK18-NEXT:    [[TMP66:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK18-NEXT:    [[TMP67:%.*]] = sext i32 [[TMP66]] to i64
+// CHECK18-NEXT:    [[TMP68:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES21]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP68]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i32 32, i1 false)
+// CHECK18-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i32*
+// CHECK18-NEXT:    store i32 [[TMP63]], i32* [[TMP70]], align 4
+// CHECK18-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
+// CHECK18-NEXT:    store i32 [[TMP63]], i32* [[TMP72]], align 4
+// CHECK18-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
+// CHECK18-NEXT:    store i8* null, i8** [[TMP73]], align 4
+// CHECK18-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i32*
+// CHECK18-NEXT:    store i32 [[TMP65]], i32* [[TMP75]], align 4
+// CHECK18-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
 // CHECK18-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP67]], i32* [[TMP77]], align 4
-// CHECK18-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP67]], i32* [[TMP79]], align 4
-// CHECK18-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 4, i64* [[TMP80]], align 4
-// CHECK18-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP81]], align 4
-// CHECK18-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP0]], i32* [[TMP83]], align 4
-// CHECK18-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP0]], i32* [[TMP85]], align 4
-// CHECK18-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 4, i64* [[TMP86]], align 4
-// CHECK18-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP87]], align 4
-// CHECK18-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP89]], align 4
-// CHECK18-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP91]], align 4
-// CHECK18-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 3
-// CHECK18-NEXT:    store i64 [[TMP69]], i64* [[TMP92]], align 4
-// CHECK18-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
-// CHECK18-NEXT:    store i8* null, i8** [[TMP93]], align 4
-// CHECK18-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP97:%.*]] = load i32, i32* [[N]], align 4
-// CHECK18-NEXT:    store i32 [[TMP97]], i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK18-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK18-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP98]], 0
+// CHECK18-NEXT:    store i32 [[TMP65]], i32* [[TMP77]], align 4
+// CHECK18-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP78]], align 4
+// CHECK18-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
+// CHECK18-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
+// CHECK18-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32*
+// CHECK18-NEXT:    store i32 [[TMP0]], i32* [[TMP82]], align 4
+// CHECK18-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP83]], align 4
+// CHECK18-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP85]], align 4
+// CHECK18-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP87]], align 4
+// CHECK18-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 3
+// CHECK18-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 4
+// CHECK18-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
+// CHECK18-NEXT:    store i8* null, i8** [[TMP89]], align 4
+// CHECK18-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP93:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP93]], i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK18-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK18-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP94]], 0
 // CHECK18-NEXT:    [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1
 // CHECK18-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[DIV26]], 1
 // CHECK18-NEXT:    store i32 [[SUB27]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK18-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK18-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP99]], 1
-// CHECK18-NEXT:    [[TMP100:%.*]] = zext i32 [[ADD28]] to i64
-// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP100]])
-// CHECK18-NEXT:    [[TMP101:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164.region_id, i32 4, i8** [[TMP94]], i8** [[TMP95]], i64* [[TMP96]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP102:%.*]] = icmp ne i32 [[TMP101]], 0
-// CHECK18-NEXT:    br i1 [[TMP102]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
+// CHECK18-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK18-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP95]], 1
+// CHECK18-NEXT:    [[TMP96:%.*]] = zext i32 [[ADD28]] to i64
+// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP96]])
+// CHECK18-NEXT:    [[TMP97:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164.region_id, i32 4, i8** [[TMP90]], i8** [[TMP91]], i64* [[TMP92]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP98:%.*]] = icmp ne i32 [[TMP97]], 0
+// CHECK18-NEXT:    br i1 [[TMP98]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
 // CHECK18:       omp_offload.failed29:
-// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164(i32 [[TMP65]], i32 [[TMP67]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164(i32 [[TMP63]], i32 [[TMP65]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT30]]
 // CHECK18:       omp_offload.cont30:
-// CHECK18-NEXT:    [[TMP103:%.*]] = load i32, i32* [[N]], align 4
-// CHECK18-NEXT:    store i32 [[TMP103]], i32* [[N_CASTED31]], align 4
-// CHECK18-NEXT:    [[TMP104:%.*]] = load i32, i32* [[N_CASTED31]], align 4
-// CHECK18-NEXT:    [[TMP105:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK18-NEXT:    [[TMP106:%.*]] = sext i32 [[TMP105]] to i64
-// CHECK18-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP104]], i32* [[TMP108]], align 4
-// CHECK18-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP99:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP99]], i32* [[N_CASTED31]], align 4
+// CHECK18-NEXT:    [[TMP100:%.*]] = load i32, i32* [[N_CASTED31]], align 4
+// CHECK18-NEXT:    [[TMP101:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK18-NEXT:    [[TMP102:%.*]] = sext i32 [[TMP101]] to i64
+// CHECK18-NEXT:    [[TMP103:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES35]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP103]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i32 24, i1 false)
+// CHECK18-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i32*
+// CHECK18-NEXT:    store i32 [[TMP100]], i32* [[TMP105]], align 4
+// CHECK18-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i32*
+// CHECK18-NEXT:    store i32 [[TMP100]], i32* [[TMP107]], align 4
+// CHECK18-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 0
+// CHECK18-NEXT:    store i8* null, i8** [[TMP108]], align 4
+// CHECK18-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 1
 // CHECK18-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP104]], i32* [[TMP110]], align 4
-// CHECK18-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 4, i64* [[TMP111]], align 4
-// CHECK18-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 0
-// CHECK18-NEXT:    store i8* null, i8** [[TMP112]], align 4
-// CHECK18-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP0]], i32* [[TMP114]], align 4
-// CHECK18-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP0]], i32* [[TMP116]], align 4
-// CHECK18-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 4, i64* [[TMP117]], align 4
-// CHECK18-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP118]], align 4
-// CHECK18-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP120]], align 4
-// CHECK18-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP122]], align 4
-// CHECK18-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 [[TMP106]], i64* [[TMP123]], align 4
-// CHECK18-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP124]], align 4
-// CHECK18-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP128:%.*]] = load i32, i32* [[N]], align 4
-// CHECK18-NEXT:    store i32 [[TMP128]], i32* [[DOTCAPTURE_EXPR_37]], align 4
-// CHECK18-NEXT:    [[TMP129:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4
-// CHECK18-NEXT:    [[SUB39:%.*]] = sub nsw i32 [[TMP129]], 0
+// CHECK18-NEXT:    store i32 [[TMP0]], i32* [[TMP110]], align 4
+// CHECK18-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32*
+// CHECK18-NEXT:    store i32 [[TMP0]], i32* [[TMP112]], align 4
+// CHECK18-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP113]], align 4
+// CHECK18-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP115]], align 4
+// CHECK18-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP117]], align 4
+// CHECK18-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 2
+// CHECK18-NEXT:    store i64 [[TMP102]], i64* [[TMP118]], align 4
+// CHECK18-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP119]], align 4
+// CHECK18-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP123:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP123]], i32* [[DOTCAPTURE_EXPR_37]], align 4
+// CHECK18-NEXT:    [[TMP124:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4
+// CHECK18-NEXT:    [[SUB39:%.*]] = sub nsw i32 [[TMP124]], 0
 // CHECK18-NEXT:    [[DIV40:%.*]] = sdiv i32 [[SUB39]], 1
 // CHECK18-NEXT:    [[SUB41:%.*]] = sub nsw i32 [[DIV40]], 1
 // CHECK18-NEXT:    store i32 [[SUB41]], i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK18-NEXT:    [[TMP130:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK18-NEXT:    [[ADD42:%.*]] = add nsw i32 [[TMP130]], 1
-// CHECK18-NEXT:    [[TMP131:%.*]] = zext i32 [[ADD42]] to i64
-// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP131]])
-// CHECK18-NEXT:    [[TMP132:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169.region_id, i32 3, i8** [[TMP125]], i8** [[TMP126]], i64* [[TMP127]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP133:%.*]] = icmp ne i32 [[TMP132]], 0
-// CHECK18-NEXT:    br i1 [[TMP133]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
+// CHECK18-NEXT:    [[TMP125:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
+// CHECK18-NEXT:    [[ADD42:%.*]] = add nsw i32 [[TMP125]], 1
+// CHECK18-NEXT:    [[TMP126:%.*]] = zext i32 [[ADD42]] to i64
+// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP126]])
+// CHECK18-NEXT:    [[TMP127:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169.region_id, i32 3, i8** [[TMP120]], i8** [[TMP121]], i64* [[TMP122]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP128:%.*]] = icmp ne i32 [[TMP127]], 0
+// CHECK18-NEXT:    br i1 [[TMP128]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
 // CHECK18:       omp_offload.failed43:
-// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169(i32 [[TMP104]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169(i32 [[TMP100]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT44]]
 // CHECK18:       omp_offload.cont44:
-// CHECK18-NEXT:    [[TMP134:%.*]] = load i32, i32* [[M]], align 4
-// CHECK18-NEXT:    store i32 [[TMP134]], i32* [[M_CASTED45]], align 4
-// CHECK18-NEXT:    [[TMP135:%.*]] = load i32, i32* [[M_CASTED45]], align 4
-// CHECK18-NEXT:    [[TMP136:%.*]] = load i32, i32* [[N]], align 4
-// CHECK18-NEXT:    store i32 [[TMP136]], i32* [[N_CASTED46]], align 4
-// CHECK18-NEXT:    [[TMP137:%.*]] = load i32, i32* [[N_CASTED46]], align 4
-// CHECK18-NEXT:    [[TMP138:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK18-NEXT:    [[TMP139:%.*]] = sext i32 [[TMP138]] to i64
-// CHECK18-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP135]], i32* [[TMP141]], align 4
-// CHECK18-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP135]], i32* [[TMP143]], align 4
-// CHECK18-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 4, i64* [[TMP144]], align 4
-// CHECK18-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP129:%.*]] = load i32, i32* [[M]], align 4
+// CHECK18-NEXT:    store i32 [[TMP129]], i32* [[M_CASTED45]], align 4
+// CHECK18-NEXT:    [[TMP130:%.*]] = load i32, i32* [[M_CASTED45]], align 4
+// CHECK18-NEXT:    [[TMP131:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP131]], i32* [[N_CASTED46]], align 4
+// CHECK18-NEXT:    [[TMP132:%.*]] = load i32, i32* [[N_CASTED46]], align 4
+// CHECK18-NEXT:    [[TMP133:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK18-NEXT:    [[TMP134:%.*]] = sext i32 [[TMP133]] to i64
+// CHECK18-NEXT:    [[TMP135:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES50]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP135]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i32 32, i1 false)
+// CHECK18-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i32*
+// CHECK18-NEXT:    store i32 [[TMP130]], i32* [[TMP137]], align 4
+// CHECK18-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i32*
+// CHECK18-NEXT:    store i32 [[TMP130]], i32* [[TMP139]], align 4
+// CHECK18-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 0
+// CHECK18-NEXT:    store i8* null, i8** [[TMP140]], align 4
+// CHECK18-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to i32*
+// CHECK18-NEXT:    store i32 [[TMP132]], i32* [[TMP142]], align 4
+// CHECK18-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to i32*
+// CHECK18-NEXT:    store i32 [[TMP132]], i32* [[TMP144]], align 4
+// CHECK18-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 1
 // CHECK18-NEXT:    store i8* null, i8** [[TMP145]], align 4
-// CHECK18-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 2
 // CHECK18-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP137]], i32* [[TMP147]], align 4
-// CHECK18-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 1
+// CHECK18-NEXT:    store i32 [[TMP0]], i32* [[TMP147]], align 4
+// CHECK18-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 2
 // CHECK18-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP137]], i32* [[TMP149]], align 4
-// CHECK18-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 4, i64* [[TMP150]], align 4
-// CHECK18-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP151]], align 4
-// CHECK18-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP0]], i32* [[TMP153]], align 4
-// CHECK18-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32*
-// CHECK18-NEXT:    store i32 [[TMP0]], i32* [[TMP155]], align 4
-// CHECK18-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 4, i64* [[TMP156]], align 4
-// CHECK18-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP157]], align 4
-// CHECK18-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP159]], align 4
-// CHECK18-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP161:%.*]] = bitcast i8** [[TMP160]] to i32**
-// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP161]], align 4
-// CHECK18-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 3
-// CHECK18-NEXT:    store i64 [[TMP139]], i64* [[TMP162]], align 4
-// CHECK18-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 3
-// CHECK18-NEXT:    store i8* null, i8** [[TMP163]], align 4
-// CHECK18-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP167:%.*]] = load i32, i32* [[N]], align 4
-// CHECK18-NEXT:    store i32 [[TMP167]], i32* [[DOTCAPTURE_EXPR_52]], align 4
-// CHECK18-NEXT:    [[TMP168:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_52]], align 4
-// CHECK18-NEXT:    [[SUB54:%.*]] = sub nsw i32 [[TMP168]], 0
+// CHECK18-NEXT:    store i32 [[TMP0]], i32* [[TMP149]], align 4
+// CHECK18-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP150]], align 4
+// CHECK18-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP152:%.*]] = bitcast i8** [[TMP151]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP152]], align 4
+// CHECK18-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP154:%.*]] = bitcast i8** [[TMP153]] to i32**
+// CHECK18-NEXT:    store i32* [[VLA]], i32** [[TMP154]], align 4
+// CHECK18-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 3
+// CHECK18-NEXT:    store i64 [[TMP134]], i64* [[TMP155]], align 4
+// CHECK18-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 3
+// CHECK18-NEXT:    store i8* null, i8** [[TMP156]], align 4
+// CHECK18-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP160:%.*]] = load i32, i32* [[N]], align 4
+// CHECK18-NEXT:    store i32 [[TMP160]], i32* [[DOTCAPTURE_EXPR_52]], align 4
+// CHECK18-NEXT:    [[TMP161:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_52]], align 4
+// CHECK18-NEXT:    [[SUB54:%.*]] = sub nsw i32 [[TMP161]], 0
 // CHECK18-NEXT:    [[DIV55:%.*]] = sdiv i32 [[SUB54]], 1
 // CHECK18-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[DIV55]], 1
 // CHECK18-NEXT:    store i32 [[SUB56]], i32* [[DOTCAPTURE_EXPR_53]], align 4
-// CHECK18-NEXT:    [[TMP169:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_53]], align 4
-// CHECK18-NEXT:    [[ADD57:%.*]] = add nsw i32 [[TMP169]], 1
-// CHECK18-NEXT:    [[TMP170:%.*]] = zext i32 [[ADD57]] to i64
-// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP170]])
-// CHECK18-NEXT:    [[TMP171:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174.region_id, i32 4, i8** [[TMP164]], i8** [[TMP165]], i64* [[TMP166]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP172:%.*]] = icmp ne i32 [[TMP171]], 0
-// CHECK18-NEXT:    br i1 [[TMP172]], label [[OMP_OFFLOAD_FAILED58:%.*]], label [[OMP_OFFLOAD_CONT59:%.*]]
+// CHECK18-NEXT:    [[TMP162:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_53]], align 4
+// CHECK18-NEXT:    [[ADD57:%.*]] = add nsw i32 [[TMP162]], 1
+// CHECK18-NEXT:    [[TMP163:%.*]] = zext i32 [[ADD57]] to i64
+// CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP163]])
+// CHECK18-NEXT:    [[TMP164:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174.region_id, i32 4, i8** [[TMP157]], i8** [[TMP158]], i64* [[TMP159]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP165:%.*]] = icmp ne i32 [[TMP164]], 0
+// CHECK18-NEXT:    br i1 [[TMP165]], label [[OMP_OFFLOAD_FAILED58:%.*]], label [[OMP_OFFLOAD_CONT59:%.*]]
 // CHECK18:       omp_offload.failed58:
-// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174(i32 [[TMP135]], i32 [[TMP137]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174(i32 [[TMP130]], i32 [[TMP132]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT59]]
 // CHECK18:       omp_offload.cont59:
-// CHECK18-NEXT:    [[TMP173:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK18-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP173]])
+// CHECK18-NEXT:    [[TMP166:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK18-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP166]])
 // CHECK18-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK18-NEXT:    [[TMP174:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP174]])
-// CHECK18-NEXT:    [[TMP175:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK18-NEXT:    ret i32 [[TMP175]]
+// CHECK18-NEXT:    [[TMP167:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP167]])
+// CHECK18-NEXT:    [[TMP168:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK18-NEXT:    ret i32 [[TMP168]]
 //
 //
 // CHECK18-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154
@@ -22822,11 +22738,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK18-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK18-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -22904,7 +22820,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !29
 // CHECK18-NEXT:    store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29
 // CHECK18-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]), !llvm.access.group !29
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]), !llvm.access.group !29
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29
@@ -22958,7 +22874,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -23085,11 +23001,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 4
 // CHECK18-NEXT:    [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
 // CHECK18-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -23159,7 +23075,7 @@ int main (int argc, char **argv) {
 // CHECK18:       omp.inner.for.body:
 // CHECK18-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35
 // CHECK18-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !35
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !35
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35
@@ -23190,7 +23106,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -23320,11 +23236,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK18-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK18-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -23400,7 +23316,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !41
 // CHECK18-NEXT:    store i32 [[TMP18]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41
 // CHECK18-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]]), !llvm.access.group !41
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]]), !llvm.access.group !41
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41
@@ -23431,7 +23347,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -23545,7 +23461,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK18-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK18-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK18-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK18-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -23585,7 +23501,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK18-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l122.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l122.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK18-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK18-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK18:       omp_offload.failed:
@@ -23603,7 +23519,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK18-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l127.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l127.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK18-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK18-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK18:       omp_offload.failed5:
@@ -23632,7 +23548,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK18-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l132.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l132.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK18-NEXT:    [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
 // CHECK18-NEXT:    br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK18:       omp_offload.failed11:
@@ -23650,7 +23566,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK18-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l137.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l137.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK18-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
 // CHECK18-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK18:       omp_offload.failed17:
@@ -23679,7 +23595,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
 // CHECK18-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
 // CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK18-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l142.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l142.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK18-NEXT:    [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0
 // CHECK18-NEXT:    br i1 [[TMP58]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]]
 // CHECK18:       omp_offload.failed24:
@@ -23695,11 +23611,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK18-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK18-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -23745,7 +23661,7 @@ int main (int argc, char **argv) {
 // CHECK18:       omp.inner.for.body:
 // CHECK18-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47
 // CHECK18-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47
@@ -23767,7 +23683,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -23854,11 +23770,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK18-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK18-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -23904,7 +23820,7 @@ int main (int argc, char **argv) {
 // CHECK18:       omp.inner.for.body:
 // CHECK18-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !53
 // CHECK18-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53
@@ -23926,7 +23842,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -24022,11 +23938,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK18-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK18-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -24078,7 +23994,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !59
 // CHECK18-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59
 // CHECK18-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59
@@ -24100,7 +24016,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -24209,11 +24125,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK18-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK18-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -24259,7 +24175,7 @@ int main (int argc, char **argv) {
 // CHECK18:       omp.inner.for.body:
 // CHECK18-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !65
 // CHECK18-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65
@@ -24281,7 +24197,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -24375,11 +24291,11 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK18-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK18-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -24431,7 +24347,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !71
 // CHECK18-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71
 // CHECK18-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71
-// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71
+// CHECK18-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71
 // CHECK18-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK18:       omp.inner.for.inc:
 // CHECK18-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71
@@ -24453,7 +24369,7 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    ret void
 //
 //
-// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK18-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK18-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -24536,7 +24452,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK18-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK18-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK18-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK18-NEXT:    ret void
@@ -24609,14 +24525,14 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK19-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK19-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK19-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK19-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK19-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK19-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK19-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK19-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK19-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK19-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK19-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -24625,298 +24541,284 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK19-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK19-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK19-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK19-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK19-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK19-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK19-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK19-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK19-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK19-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK19-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK19-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK19-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK19-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK19-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK19-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK19-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK19-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK19-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK19-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK19-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK19-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK19-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK19-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK19-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK19-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK19-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK19-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK19-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK19-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK19-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK19-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK19:       omp_offload.failed:
 // CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK19:       omp_offload.cont:
-// CHECK19-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK19-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK19-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK19-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK19-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK19-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK19-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK19-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK19-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i32 24, i1 false)
 // CHECK19-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK19-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK19-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK19-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK19-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK19-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK19-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK19-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK19-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK19-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK19-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK19-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK19-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK19-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK19-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK19-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK19-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK19-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK19-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK19-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK19-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK19-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK19-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK19-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK19-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK19-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK19-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK19-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK19-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK19-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK19-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK19-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK19-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK19-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK19-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK19-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK19-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK19-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP61]])
-// CHECK19-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK19-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK19-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK19-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK19-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP59]])
+// CHECK19-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK19-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK19:       omp_offload.failed15:
-// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK19:       omp_offload.cont16:
-// CHECK19-NEXT:    [[TMP64:%.*]] = load i32, i32* [[M]], align 4
-// CHECK19-NEXT:    store i32 [[TMP64]], i32* [[M_CASTED]], align 4
-// CHECK19-NEXT:    [[TMP65:%.*]] = load i32, i32* [[M_CASTED]], align 4
-// CHECK19-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP66]], i32* [[N_CASTED17]], align 4
-// CHECK19-NEXT:    [[TMP67:%.*]] = load i32, i32* [[N_CASTED17]], align 4
-// CHECK19-NEXT:    [[TMP68:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK19-NEXT:    [[TMP69:%.*]] = sext i32 [[TMP68]] to i64
-// CHECK19-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP65]], i32* [[TMP71]], align 4
-// CHECK19-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP65]], i32* [[TMP73]], align 4
-// CHECK19-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 4, i64* [[TMP74]], align 4
-// CHECK19-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
-// CHECK19-NEXT:    store i8* null, i8** [[TMP75]], align 4
-// CHECK19-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP62:%.*]] = load i32, i32* [[M]], align 4
+// CHECK19-NEXT:    store i32 [[TMP62]], i32* [[M_CASTED]], align 4
+// CHECK19-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M_CASTED]], align 4
+// CHECK19-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP64]], i32* [[N_CASTED17]], align 4
+// CHECK19-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N_CASTED17]], align 4
+// CHECK19-NEXT:    [[TMP66:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK19-NEXT:    [[TMP67:%.*]] = sext i32 [[TMP66]] to i64
+// CHECK19-NEXT:    [[TMP68:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES21]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP68]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i32 32, i1 false)
+// CHECK19-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP63]], i32* [[TMP70]], align 4
+// CHECK19-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP63]], i32* [[TMP72]], align 4
+// CHECK19-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
+// CHECK19-NEXT:    store i8* null, i8** [[TMP73]], align 4
+// CHECK19-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP65]], i32* [[TMP75]], align 4
+// CHECK19-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
 // CHECK19-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP67]], i32* [[TMP77]], align 4
-// CHECK19-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP67]], i32* [[TMP79]], align 4
-// CHECK19-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP80]], align 4
-// CHECK19-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP81]], align 4
-// CHECK19-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP83]], align 4
-// CHECK19-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP85]], align 4
-// CHECK19-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 4, i64* [[TMP86]], align 4
-// CHECK19-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP87]], align 4
-// CHECK19-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP89]], align 4
-// CHECK19-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP91]], align 4
-// CHECK19-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 3
-// CHECK19-NEXT:    store i64 [[TMP69]], i64* [[TMP92]], align 4
-// CHECK19-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
-// CHECK19-NEXT:    store i8* null, i8** [[TMP93]], align 4
-// CHECK19-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP97:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP97]], i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK19-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK19-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP98]], 0
+// CHECK19-NEXT:    store i32 [[TMP65]], i32* [[TMP77]], align 4
+// CHECK19-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP78]], align 4
+// CHECK19-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
+// CHECK19-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP82]], align 4
+// CHECK19-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP83]], align 4
+// CHECK19-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP85]], align 4
+// CHECK19-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP87]], align 4
+// CHECK19-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 3
+// CHECK19-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 4
+// CHECK19-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
+// CHECK19-NEXT:    store i8* null, i8** [[TMP89]], align 4
+// CHECK19-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP93:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP93]], i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK19-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK19-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP94]], 0
 // CHECK19-NEXT:    [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1
 // CHECK19-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[DIV26]], 1
 // CHECK19-NEXT:    store i32 [[SUB27]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK19-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK19-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP99]], 1
-// CHECK19-NEXT:    [[TMP100:%.*]] = zext i32 [[ADD28]] to i64
-// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP100]])
-// CHECK19-NEXT:    [[TMP101:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164.region_id, i32 4, i8** [[TMP94]], i8** [[TMP95]], i64* [[TMP96]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP102:%.*]] = icmp ne i32 [[TMP101]], 0
-// CHECK19-NEXT:    br i1 [[TMP102]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
+// CHECK19-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK19-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP95]], 1
+// CHECK19-NEXT:    [[TMP96:%.*]] = zext i32 [[ADD28]] to i64
+// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP96]])
+// CHECK19-NEXT:    [[TMP97:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164.region_id, i32 4, i8** [[TMP90]], i8** [[TMP91]], i64* [[TMP92]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP98:%.*]] = icmp ne i32 [[TMP97]], 0
+// CHECK19-NEXT:    br i1 [[TMP98]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
 // CHECK19:       omp_offload.failed29:
-// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164(i32 [[TMP65]], i32 [[TMP67]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164(i32 [[TMP63]], i32 [[TMP65]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT30]]
 // CHECK19:       omp_offload.cont30:
-// CHECK19-NEXT:    [[TMP103:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP103]], i32* [[N_CASTED31]], align 4
-// CHECK19-NEXT:    [[TMP104:%.*]] = load i32, i32* [[N_CASTED31]], align 4
-// CHECK19-NEXT:    [[TMP105:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK19-NEXT:    [[TMP106:%.*]] = sext i32 [[TMP105]] to i64
-// CHECK19-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP104]], i32* [[TMP108]], align 4
-// CHECK19-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP99:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP99]], i32* [[N_CASTED31]], align 4
+// CHECK19-NEXT:    [[TMP100:%.*]] = load i32, i32* [[N_CASTED31]], align 4
+// CHECK19-NEXT:    [[TMP101:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK19-NEXT:    [[TMP102:%.*]] = sext i32 [[TMP101]] to i64
+// CHECK19-NEXT:    [[TMP103:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES35]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP103]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i32 24, i1 false)
+// CHECK19-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP100]], i32* [[TMP105]], align 4
+// CHECK19-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP100]], i32* [[TMP107]], align 4
+// CHECK19-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 0
+// CHECK19-NEXT:    store i8* null, i8** [[TMP108]], align 4
+// CHECK19-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 1
 // CHECK19-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP104]], i32* [[TMP110]], align 4
-// CHECK19-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 4, i64* [[TMP111]], align 4
-// CHECK19-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 0
-// CHECK19-NEXT:    store i8* null, i8** [[TMP112]], align 4
-// CHECK19-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP114]], align 4
-// CHECK19-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP116]], align 4
-// CHECK19-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP117]], align 4
-// CHECK19-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP118]], align 4
-// CHECK19-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP120]], align 4
-// CHECK19-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP122]], align 4
-// CHECK19-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 [[TMP106]], i64* [[TMP123]], align 4
-// CHECK19-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP124]], align 4
-// CHECK19-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP128:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP128]], i32* [[DOTCAPTURE_EXPR_37]], align 4
-// CHECK19-NEXT:    [[TMP129:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4
-// CHECK19-NEXT:    [[SUB39:%.*]] = sub nsw i32 [[TMP129]], 0
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP110]], align 4
+// CHECK19-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP112]], align 4
+// CHECK19-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP113]], align 4
+// CHECK19-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP115]], align 4
+// CHECK19-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP117]], align 4
+// CHECK19-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 2
+// CHECK19-NEXT:    store i64 [[TMP102]], i64* [[TMP118]], align 4
+// CHECK19-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP119]], align 4
+// CHECK19-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP123:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP123]], i32* [[DOTCAPTURE_EXPR_37]], align 4
+// CHECK19-NEXT:    [[TMP124:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4
+// CHECK19-NEXT:    [[SUB39:%.*]] = sub nsw i32 [[TMP124]], 0
 // CHECK19-NEXT:    [[DIV40:%.*]] = sdiv i32 [[SUB39]], 1
 // CHECK19-NEXT:    [[SUB41:%.*]] = sub nsw i32 [[DIV40]], 1
 // CHECK19-NEXT:    store i32 [[SUB41]], i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK19-NEXT:    [[TMP130:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK19-NEXT:    [[ADD42:%.*]] = add nsw i32 [[TMP130]], 1
-// CHECK19-NEXT:    [[TMP131:%.*]] = zext i32 [[ADD42]] to i64
-// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP131]])
-// CHECK19-NEXT:    [[TMP132:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169.region_id, i32 3, i8** [[TMP125]], i8** [[TMP126]], i64* [[TMP127]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP133:%.*]] = icmp ne i32 [[TMP132]], 0
-// CHECK19-NEXT:    br i1 [[TMP133]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
+// CHECK19-NEXT:    [[TMP125:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
+// CHECK19-NEXT:    [[ADD42:%.*]] = add nsw i32 [[TMP125]], 1
+// CHECK19-NEXT:    [[TMP126:%.*]] = zext i32 [[ADD42]] to i64
+// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP126]])
+// CHECK19-NEXT:    [[TMP127:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169.region_id, i32 3, i8** [[TMP120]], i8** [[TMP121]], i64* [[TMP122]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP128:%.*]] = icmp ne i32 [[TMP127]], 0
+// CHECK19-NEXT:    br i1 [[TMP128]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
 // CHECK19:       omp_offload.failed43:
-// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169(i32 [[TMP104]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169(i32 [[TMP100]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT44]]
 // CHECK19:       omp_offload.cont44:
-// CHECK19-NEXT:    [[TMP134:%.*]] = load i32, i32* [[M]], align 4
-// CHECK19-NEXT:    store i32 [[TMP134]], i32* [[M_CASTED45]], align 4
-// CHECK19-NEXT:    [[TMP135:%.*]] = load i32, i32* [[M_CASTED45]], align 4
-// CHECK19-NEXT:    [[TMP136:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP136]], i32* [[N_CASTED46]], align 4
-// CHECK19-NEXT:    [[TMP137:%.*]] = load i32, i32* [[N_CASTED46]], align 4
-// CHECK19-NEXT:    [[TMP138:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK19-NEXT:    [[TMP139:%.*]] = sext i32 [[TMP138]] to i64
-// CHECK19-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP135]], i32* [[TMP141]], align 4
-// CHECK19-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP135]], i32* [[TMP143]], align 4
-// CHECK19-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 4, i64* [[TMP144]], align 4
-// CHECK19-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP129:%.*]] = load i32, i32* [[M]], align 4
+// CHECK19-NEXT:    store i32 [[TMP129]], i32* [[M_CASTED45]], align 4
+// CHECK19-NEXT:    [[TMP130:%.*]] = load i32, i32* [[M_CASTED45]], align 4
+// CHECK19-NEXT:    [[TMP131:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP131]], i32* [[N_CASTED46]], align 4
+// CHECK19-NEXT:    [[TMP132:%.*]] = load i32, i32* [[N_CASTED46]], align 4
+// CHECK19-NEXT:    [[TMP133:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK19-NEXT:    [[TMP134:%.*]] = sext i32 [[TMP133]] to i64
+// CHECK19-NEXT:    [[TMP135:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES50]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP135]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i32 32, i1 false)
+// CHECK19-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP130]], i32* [[TMP137]], align 4
+// CHECK19-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP130]], i32* [[TMP139]], align 4
+// CHECK19-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 0
+// CHECK19-NEXT:    store i8* null, i8** [[TMP140]], align 4
+// CHECK19-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP132]], i32* [[TMP142]], align 4
+// CHECK19-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP132]], i32* [[TMP144]], align 4
+// CHECK19-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 1
 // CHECK19-NEXT:    store i8* null, i8** [[TMP145]], align 4
-// CHECK19-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 2
 // CHECK19-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP137]], i32* [[TMP147]], align 4
-// CHECK19-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 1
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP147]], align 4
+// CHECK19-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 2
 // CHECK19-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP137]], i32* [[TMP149]], align 4
-// CHECK19-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP150]], align 4
-// CHECK19-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP151]], align 4
-// CHECK19-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP153]], align 4
-// CHECK19-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP155]], align 4
-// CHECK19-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 4, i64* [[TMP156]], align 4
-// CHECK19-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP157]], align 4
-// CHECK19-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP159]], align 4
-// CHECK19-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP161:%.*]] = bitcast i8** [[TMP160]] to i32**
-// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP161]], align 4
-// CHECK19-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 3
-// CHECK19-NEXT:    store i64 [[TMP139]], i64* [[TMP162]], align 4
-// CHECK19-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 3
-// CHECK19-NEXT:    store i8* null, i8** [[TMP163]], align 4
-// CHECK19-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP167:%.*]] = load i32, i32* [[N]], align 4
-// CHECK19-NEXT:    store i32 [[TMP167]], i32* [[DOTCAPTURE_EXPR_52]], align 4
-// CHECK19-NEXT:    [[TMP168:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_52]], align 4
-// CHECK19-NEXT:    [[SUB54:%.*]] = sub nsw i32 [[TMP168]], 0
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP149]], align 4
+// CHECK19-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP150]], align 4
+// CHECK19-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP152:%.*]] = bitcast i8** [[TMP151]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP152]], align 4
+// CHECK19-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP154:%.*]] = bitcast i8** [[TMP153]] to i32**
+// CHECK19-NEXT:    store i32* [[VLA]], i32** [[TMP154]], align 4
+// CHECK19-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 3
+// CHECK19-NEXT:    store i64 [[TMP134]], i64* [[TMP155]], align 4
+// CHECK19-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 3
+// CHECK19-NEXT:    store i8* null, i8** [[TMP156]], align 4
+// CHECK19-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP160:%.*]] = load i32, i32* [[N]], align 4
+// CHECK19-NEXT:    store i32 [[TMP160]], i32* [[DOTCAPTURE_EXPR_52]], align 4
+// CHECK19-NEXT:    [[TMP161:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_52]], align 4
+// CHECK19-NEXT:    [[SUB54:%.*]] = sub nsw i32 [[TMP161]], 0
 // CHECK19-NEXT:    [[DIV55:%.*]] = sdiv i32 [[SUB54]], 1
 // CHECK19-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[DIV55]], 1
 // CHECK19-NEXT:    store i32 [[SUB56]], i32* [[DOTCAPTURE_EXPR_53]], align 4
-// CHECK19-NEXT:    [[TMP169:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_53]], align 4
-// CHECK19-NEXT:    [[ADD57:%.*]] = add nsw i32 [[TMP169]], 1
-// CHECK19-NEXT:    [[TMP170:%.*]] = zext i32 [[ADD57]] to i64
-// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP170]])
-// CHECK19-NEXT:    [[TMP171:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174.region_id, i32 4, i8** [[TMP164]], i8** [[TMP165]], i64* [[TMP166]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP172:%.*]] = icmp ne i32 [[TMP171]], 0
-// CHECK19-NEXT:    br i1 [[TMP172]], label [[OMP_OFFLOAD_FAILED58:%.*]], label [[OMP_OFFLOAD_CONT59:%.*]]
+// CHECK19-NEXT:    [[TMP162:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_53]], align 4
+// CHECK19-NEXT:    [[ADD57:%.*]] = add nsw i32 [[TMP162]], 1
+// CHECK19-NEXT:    [[TMP163:%.*]] = zext i32 [[ADD57]] to i64
+// CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP163]])
+// CHECK19-NEXT:    [[TMP164:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174.region_id, i32 4, i8** [[TMP157]], i8** [[TMP158]], i64* [[TMP159]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP165:%.*]] = icmp ne i32 [[TMP164]], 0
+// CHECK19-NEXT:    br i1 [[TMP165]], label [[OMP_OFFLOAD_FAILED58:%.*]], label [[OMP_OFFLOAD_CONT59:%.*]]
 // CHECK19:       omp_offload.failed58:
-// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174(i32 [[TMP135]], i32 [[TMP137]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174(i32 [[TMP130]], i32 [[TMP132]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT59]]
 // CHECK19:       omp_offload.cont59:
-// CHECK19-NEXT:    [[TMP173:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK19-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP173]])
+// CHECK19-NEXT:    [[TMP166:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK19-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP166]])
 // CHECK19-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK19-NEXT:    [[TMP174:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP174]])
-// CHECK19-NEXT:    [[TMP175:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK19-NEXT:    ret i32 [[TMP175]]
+// CHECK19-NEXT:    [[TMP167:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP167]])
+// CHECK19-NEXT:    [[TMP168:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK19-NEXT:    ret i32 [[TMP168]]
 //
 //
 // CHECK19-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154
@@ -25399,11 +25301,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK19-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK19-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -25481,7 +25383,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !29
 // CHECK19-NEXT:    store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29
 // CHECK19-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]), !llvm.access.group !29
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]), !llvm.access.group !29
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29
@@ -25535,7 +25437,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -25662,11 +25564,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 4
 // CHECK19-NEXT:    [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
 // CHECK19-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -25736,7 +25638,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.inner.for.body:
 // CHECK19-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35
 // CHECK19-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !35
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !35
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35
@@ -25767,7 +25669,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -25897,11 +25799,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK19-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK19-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -25977,7 +25879,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !41
 // CHECK19-NEXT:    store i32 [[TMP18]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41
 // CHECK19-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]]), !llvm.access.group !41
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]]), !llvm.access.group !41
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41
@@ -26008,7 +25910,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26122,7 +26024,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK19-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK19-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK19-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK19-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -26162,7 +26064,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK19-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l122.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l122.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK19-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK19-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK19:       omp_offload.failed:
@@ -26180,7 +26082,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK19-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l127.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l127.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK19-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK19-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK19:       omp_offload.failed5:
@@ -26209,7 +26111,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK19-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l132.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l132.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK19-NEXT:    [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
 // CHECK19-NEXT:    br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK19:       omp_offload.failed11:
@@ -26227,7 +26129,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK19-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l137.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l137.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK19-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
 // CHECK19-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK19:       omp_offload.failed17:
@@ -26256,7 +26158,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
 // CHECK19-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
 // CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK19-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l142.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l142.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK19-NEXT:    [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0
 // CHECK19-NEXT:    br i1 [[TMP58]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]]
 // CHECK19:       omp_offload.failed24:
@@ -26272,11 +26174,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK19-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK19-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26322,7 +26224,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.inner.for.body:
 // CHECK19-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47
 // CHECK19-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47
@@ -26344,7 +26246,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26431,11 +26333,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK19-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK19-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26481,7 +26383,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.inner.for.body:
 // CHECK19-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !53
 // CHECK19-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53
@@ -26503,7 +26405,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26599,11 +26501,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK19-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK19-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26655,7 +26557,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !59
 // CHECK19-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59
 // CHECK19-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59
@@ -26677,7 +26579,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26786,11 +26688,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK19-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK19-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26836,7 +26738,7 @@ int main (int argc, char **argv) {
 // CHECK19:       omp.inner.for.body:
 // CHECK19-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !65
 // CHECK19-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65
@@ -26858,7 +26760,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -26952,11 +26854,11 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK19-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK19-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -27008,7 +26910,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !71
 // CHECK19-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71
 // CHECK19-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71
-// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71
+// CHECK19-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71
 // CHECK19-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK19:       omp.inner.for.inc:
 // CHECK19-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71
@@ -27030,7 +26932,7 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    ret void
 //
 //
-// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK19-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -27113,7 +27015,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK19-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK19-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK19-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK19-NEXT:    ret void
@@ -27186,14 +27088,14 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK20-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK20-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK20-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK20-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK20-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK20-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK20-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK20-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK20-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK20-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK20-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -27202,298 +27104,284 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK20-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK20-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK20-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK20-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK20-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK20-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK20-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK20-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK20-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK20-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK20-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK20-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK20-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK20-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK20-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK20-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK20-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK20-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK20-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK20-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK20-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK20-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK20-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK20-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK20-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK20-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK20-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK20-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK20-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK20-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK20-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK20-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK20:       omp_offload.failed:
 // CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK20:       omp_offload.cont:
-// CHECK20-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK20-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK20-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK20-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK20-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK20-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK20-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK20-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK20-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.4 to i8*), i32 24, i1 false)
 // CHECK20-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK20-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK20-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK20-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK20-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK20-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK20-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK20-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK20-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK20-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK20-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK20-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK20-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK20-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK20-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK20-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK20-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK20-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK20-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK20-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK20-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK20-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK20-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK20-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK20-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK20-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK20-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK20-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK20-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK20-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK20-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK20-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK20-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK20-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK20-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK20-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK20-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK20-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP61]])
-// CHECK20-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK20-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK20-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK20-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK20-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP59]])
+// CHECK20-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK20-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK20:       omp_offload.failed15:
-// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l159(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK20:       omp_offload.cont16:
-// CHECK20-NEXT:    [[TMP64:%.*]] = load i32, i32* [[M]], align 4
-// CHECK20-NEXT:    store i32 [[TMP64]], i32* [[M_CASTED]], align 4
-// CHECK20-NEXT:    [[TMP65:%.*]] = load i32, i32* [[M_CASTED]], align 4
-// CHECK20-NEXT:    [[TMP66:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP66]], i32* [[N_CASTED17]], align 4
-// CHECK20-NEXT:    [[TMP67:%.*]] = load i32, i32* [[N_CASTED17]], align 4
-// CHECK20-NEXT:    [[TMP68:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK20-NEXT:    [[TMP69:%.*]] = sext i32 [[TMP68]] to i64
-// CHECK20-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP65]], i32* [[TMP71]], align 4
-// CHECK20-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP65]], i32* [[TMP73]], align 4
-// CHECK20-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 4, i64* [[TMP74]], align 4
-// CHECK20-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
-// CHECK20-NEXT:    store i8* null, i8** [[TMP75]], align 4
-// CHECK20-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP62:%.*]] = load i32, i32* [[M]], align 4
+// CHECK20-NEXT:    store i32 [[TMP62]], i32* [[M_CASTED]], align 4
+// CHECK20-NEXT:    [[TMP63:%.*]] = load i32, i32* [[M_CASTED]], align 4
+// CHECK20-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP64]], i32* [[N_CASTED17]], align 4
+// CHECK20-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N_CASTED17]], align 4
+// CHECK20-NEXT:    [[TMP66:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK20-NEXT:    [[TMP67:%.*]] = sext i32 [[TMP66]] to i64
+// CHECK20-NEXT:    [[TMP68:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES21]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP68]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.8 to i8*), i32 32, i1 false)
+// CHECK20-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP63]], i32* [[TMP70]], align 4
+// CHECK20-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP63]], i32* [[TMP72]], align 4
+// CHECK20-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
+// CHECK20-NEXT:    store i8* null, i8** [[TMP73]], align 4
+// CHECK20-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP65]], i32* [[TMP75]], align 4
+// CHECK20-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
 // CHECK20-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP67]], i32* [[TMP77]], align 4
-// CHECK20-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP67]], i32* [[TMP79]], align 4
-// CHECK20-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP80]], align 4
-// CHECK20-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP81]], align 4
-// CHECK20-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP83]], align 4
-// CHECK20-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP85]], align 4
-// CHECK20-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 4, i64* [[TMP86]], align 4
-// CHECK20-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP87]], align 4
-// CHECK20-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP89:%.*]] = bitcast i8** [[TMP88]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP89]], align 4
-// CHECK20-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP91:%.*]] = bitcast i8** [[TMP90]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP91]], align 4
-// CHECK20-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 3
-// CHECK20-NEXT:    store i64 [[TMP69]], i64* [[TMP92]], align 4
-// CHECK20-NEXT:    [[TMP93:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
-// CHECK20-NEXT:    store i8* null, i8** [[TMP93]], align 4
-// CHECK20-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP95:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP96:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP97:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP97]], i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK20-NEXT:    [[TMP98:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK20-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP98]], 0
+// CHECK20-NEXT:    store i32 [[TMP65]], i32* [[TMP77]], align 4
+// CHECK20-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP78]], align 4
+// CHECK20-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP80]], align 4
+// CHECK20-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP82]], align 4
+// CHECK20-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP83]], align 4
+// CHECK20-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP85]], align 4
+// CHECK20-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP87:%.*]] = bitcast i8** [[TMP86]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP87]], align 4
+// CHECK20-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 3
+// CHECK20-NEXT:    store i64 [[TMP67]], i64* [[TMP88]], align 4
+// CHECK20-NEXT:    [[TMP89:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 3
+// CHECK20-NEXT:    store i8* null, i8** [[TMP89]], align 4
+// CHECK20-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP91:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP92:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP93:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP93]], i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK20-NEXT:    [[TMP94:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK20-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP94]], 0
 // CHECK20-NEXT:    [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1
 // CHECK20-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[DIV26]], 1
 // CHECK20-NEXT:    store i32 [[SUB27]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK20-NEXT:    [[TMP99:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK20-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP99]], 1
-// CHECK20-NEXT:    [[TMP100:%.*]] = zext i32 [[ADD28]] to i64
-// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP100]])
-// CHECK20-NEXT:    [[TMP101:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164.region_id, i32 4, i8** [[TMP94]], i8** [[TMP95]], i64* [[TMP96]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP102:%.*]] = icmp ne i32 [[TMP101]], 0
-// CHECK20-NEXT:    br i1 [[TMP102]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
+// CHECK20-NEXT:    [[TMP95:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK20-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP95]], 1
+// CHECK20-NEXT:    [[TMP96:%.*]] = zext i32 [[ADD28]] to i64
+// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP96]])
+// CHECK20-NEXT:    [[TMP97:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164.region_id, i32 4, i8** [[TMP90]], i8** [[TMP91]], i64* [[TMP92]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP98:%.*]] = icmp ne i32 [[TMP97]], 0
+// CHECK20-NEXT:    br i1 [[TMP98]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
 // CHECK20:       omp_offload.failed29:
-// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164(i32 [[TMP65]], i32 [[TMP67]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l164(i32 [[TMP63]], i32 [[TMP65]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT30]]
 // CHECK20:       omp_offload.cont30:
-// CHECK20-NEXT:    [[TMP103:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP103]], i32* [[N_CASTED31]], align 4
-// CHECK20-NEXT:    [[TMP104:%.*]] = load i32, i32* [[N_CASTED31]], align 4
-// CHECK20-NEXT:    [[TMP105:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK20-NEXT:    [[TMP106:%.*]] = sext i32 [[TMP105]] to i64
-// CHECK20-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP108:%.*]] = bitcast i8** [[TMP107]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP104]], i32* [[TMP108]], align 4
-// CHECK20-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP99:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP99]], i32* [[N_CASTED31]], align 4
+// CHECK20-NEXT:    [[TMP100:%.*]] = load i32, i32* [[N_CASTED31]], align 4
+// CHECK20-NEXT:    [[TMP101:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK20-NEXT:    [[TMP102:%.*]] = sext i32 [[TMP101]] to i64
+// CHECK20-NEXT:    [[TMP103:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES35]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP103]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.12 to i8*), i32 24, i1 false)
+// CHECK20-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP105:%.*]] = bitcast i8** [[TMP104]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP100]], i32* [[TMP105]], align 4
+// CHECK20-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP107:%.*]] = bitcast i8** [[TMP106]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP100]], i32* [[TMP107]], align 4
+// CHECK20-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 0
+// CHECK20-NEXT:    store i8* null, i8** [[TMP108]], align 4
+// CHECK20-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 1
 // CHECK20-NEXT:    [[TMP110:%.*]] = bitcast i8** [[TMP109]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP104]], i32* [[TMP110]], align 4
-// CHECK20-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 4, i64* [[TMP111]], align 4
-// CHECK20-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 0
-// CHECK20-NEXT:    store i8* null, i8** [[TMP112]], align 4
-// CHECK20-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP114:%.*]] = bitcast i8** [[TMP113]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP114]], align 4
-// CHECK20-NEXT:    [[TMP115:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP116:%.*]] = bitcast i8** [[TMP115]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP116]], align 4
-// CHECK20-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP117]], align 4
-// CHECK20-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP118]], align 4
-// CHECK20-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP120]], align 4
-// CHECK20-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP122:%.*]] = bitcast i8** [[TMP121]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP122]], align 4
-// CHECK20-NEXT:    [[TMP123:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 [[TMP106]], i64* [[TMP123]], align 4
-// CHECK20-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP124]], align 4
-// CHECK20-NEXT:    [[TMP125:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP128:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP128]], i32* [[DOTCAPTURE_EXPR_37]], align 4
-// CHECK20-NEXT:    [[TMP129:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4
-// CHECK20-NEXT:    [[SUB39:%.*]] = sub nsw i32 [[TMP129]], 0
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP110]], align 4
+// CHECK20-NEXT:    [[TMP111:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP112:%.*]] = bitcast i8** [[TMP111]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP112]], align 4
+// CHECK20-NEXT:    [[TMP113:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP113]], align 4
+// CHECK20-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP115]], align 4
+// CHECK20-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP117:%.*]] = bitcast i8** [[TMP116]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP117]], align 4
+// CHECK20-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 2
+// CHECK20-NEXT:    store i64 [[TMP102]], i64* [[TMP118]], align 4
+// CHECK20-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS34]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP119]], align 4
+// CHECK20-NEXT:    [[TMP120:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS32]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS33]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES35]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP123:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP123]], i32* [[DOTCAPTURE_EXPR_37]], align 4
+// CHECK20-NEXT:    [[TMP124:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_37]], align 4
+// CHECK20-NEXT:    [[SUB39:%.*]] = sub nsw i32 [[TMP124]], 0
 // CHECK20-NEXT:    [[DIV40:%.*]] = sdiv i32 [[SUB39]], 1
 // CHECK20-NEXT:    [[SUB41:%.*]] = sub nsw i32 [[DIV40]], 1
 // CHECK20-NEXT:    store i32 [[SUB41]], i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK20-NEXT:    [[TMP130:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
-// CHECK20-NEXT:    [[ADD42:%.*]] = add nsw i32 [[TMP130]], 1
-// CHECK20-NEXT:    [[TMP131:%.*]] = zext i32 [[ADD42]] to i64
-// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP131]])
-// CHECK20-NEXT:    [[TMP132:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169.region_id, i32 3, i8** [[TMP125]], i8** [[TMP126]], i64* [[TMP127]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP133:%.*]] = icmp ne i32 [[TMP132]], 0
-// CHECK20-NEXT:    br i1 [[TMP133]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
+// CHECK20-NEXT:    [[TMP125:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_38]], align 4
+// CHECK20-NEXT:    [[ADD42:%.*]] = add nsw i32 [[TMP125]], 1
+// CHECK20-NEXT:    [[TMP126:%.*]] = zext i32 [[ADD42]] to i64
+// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP126]])
+// CHECK20-NEXT:    [[TMP127:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169.region_id, i32 3, i8** [[TMP120]], i8** [[TMP121]], i64* [[TMP122]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP128:%.*]] = icmp ne i32 [[TMP127]], 0
+// CHECK20-NEXT:    br i1 [[TMP128]], label [[OMP_OFFLOAD_FAILED43:%.*]], label [[OMP_OFFLOAD_CONT44:%.*]]
 // CHECK20:       omp_offload.failed43:
-// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169(i32 [[TMP104]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l169(i32 [[TMP100]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT44]]
 // CHECK20:       omp_offload.cont44:
-// CHECK20-NEXT:    [[TMP134:%.*]] = load i32, i32* [[M]], align 4
-// CHECK20-NEXT:    store i32 [[TMP134]], i32* [[M_CASTED45]], align 4
-// CHECK20-NEXT:    [[TMP135:%.*]] = load i32, i32* [[M_CASTED45]], align 4
-// CHECK20-NEXT:    [[TMP136:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP136]], i32* [[N_CASTED46]], align 4
-// CHECK20-NEXT:    [[TMP137:%.*]] = load i32, i32* [[N_CASTED46]], align 4
-// CHECK20-NEXT:    [[TMP138:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK20-NEXT:    [[TMP139:%.*]] = sext i32 [[TMP138]] to i64
-// CHECK20-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP141:%.*]] = bitcast i8** [[TMP140]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP135]], i32* [[TMP141]], align 4
-// CHECK20-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP135]], i32* [[TMP143]], align 4
-// CHECK20-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 4, i64* [[TMP144]], align 4
-// CHECK20-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP129:%.*]] = load i32, i32* [[M]], align 4
+// CHECK20-NEXT:    store i32 [[TMP129]], i32* [[M_CASTED45]], align 4
+// CHECK20-NEXT:    [[TMP130:%.*]] = load i32, i32* [[M_CASTED45]], align 4
+// CHECK20-NEXT:    [[TMP131:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP131]], i32* [[N_CASTED46]], align 4
+// CHECK20-NEXT:    [[TMP132:%.*]] = load i32, i32* [[N_CASTED46]], align 4
+// CHECK20-NEXT:    [[TMP133:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK20-NEXT:    [[TMP134:%.*]] = sext i32 [[TMP133]] to i64
+// CHECK20-NEXT:    [[TMP135:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES50]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP135]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes.16 to i8*), i32 32, i1 false)
+// CHECK20-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP137:%.*]] = bitcast i8** [[TMP136]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP130]], i32* [[TMP137]], align 4
+// CHECK20-NEXT:    [[TMP138:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP139:%.*]] = bitcast i8** [[TMP138]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP130]], i32* [[TMP139]], align 4
+// CHECK20-NEXT:    [[TMP140:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 0
+// CHECK20-NEXT:    store i8* null, i8** [[TMP140]], align 4
+// CHECK20-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP142:%.*]] = bitcast i8** [[TMP141]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP132]], i32* [[TMP142]], align 4
+// CHECK20-NEXT:    [[TMP143:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP144:%.*]] = bitcast i8** [[TMP143]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP132]], i32* [[TMP144]], align 4
+// CHECK20-NEXT:    [[TMP145:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 1
 // CHECK20-NEXT:    store i8* null, i8** [[TMP145]], align 4
-// CHECK20-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 2
 // CHECK20-NEXT:    [[TMP147:%.*]] = bitcast i8** [[TMP146]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP137]], i32* [[TMP147]], align 4
-// CHECK20-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 1
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP147]], align 4
+// CHECK20-NEXT:    [[TMP148:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 2
 // CHECK20-NEXT:    [[TMP149:%.*]] = bitcast i8** [[TMP148]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP137]], i32* [[TMP149]], align 4
-// CHECK20-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP150]], align 4
-// CHECK20-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP151]], align 4
-// CHECK20-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP153]], align 4
-// CHECK20-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP155]], align 4
-// CHECK20-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 4, i64* [[TMP156]], align 4
-// CHECK20-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP157]], align 4
-// CHECK20-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP159:%.*]] = bitcast i8** [[TMP158]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP159]], align 4
-// CHECK20-NEXT:    [[TMP160:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP161:%.*]] = bitcast i8** [[TMP160]] to i32**
-// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP161]], align 4
-// CHECK20-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 3
-// CHECK20-NEXT:    store i64 [[TMP139]], i64* [[TMP162]], align 4
-// CHECK20-NEXT:    [[TMP163:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 3
-// CHECK20-NEXT:    store i8* null, i8** [[TMP163]], align 4
-// CHECK20-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP165:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP167:%.*]] = load i32, i32* [[N]], align 4
-// CHECK20-NEXT:    store i32 [[TMP167]], i32* [[DOTCAPTURE_EXPR_52]], align 4
-// CHECK20-NEXT:    [[TMP168:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_52]], align 4
-// CHECK20-NEXT:    [[SUB54:%.*]] = sub nsw i32 [[TMP168]], 0
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP149]], align 4
+// CHECK20-NEXT:    [[TMP150:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP150]], align 4
+// CHECK20-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP152:%.*]] = bitcast i8** [[TMP151]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP152]], align 4
+// CHECK20-NEXT:    [[TMP153:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP154:%.*]] = bitcast i8** [[TMP153]] to i32**
+// CHECK20-NEXT:    store i32* [[VLA]], i32** [[TMP154]], align 4
+// CHECK20-NEXT:    [[TMP155:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 3
+// CHECK20-NEXT:    store i64 [[TMP134]], i64* [[TMP155]], align 4
+// CHECK20-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS49]], i32 0, i32 3
+// CHECK20-NEXT:    store i8* null, i8** [[TMP156]], align 4
+// CHECK20-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS47]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP158:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS48]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES50]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP160:%.*]] = load i32, i32* [[N]], align 4
+// CHECK20-NEXT:    store i32 [[TMP160]], i32* [[DOTCAPTURE_EXPR_52]], align 4
+// CHECK20-NEXT:    [[TMP161:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_52]], align 4
+// CHECK20-NEXT:    [[SUB54:%.*]] = sub nsw i32 [[TMP161]], 0
 // CHECK20-NEXT:    [[DIV55:%.*]] = sdiv i32 [[SUB54]], 1
 // CHECK20-NEXT:    [[SUB56:%.*]] = sub nsw i32 [[DIV55]], 1
 // CHECK20-NEXT:    store i32 [[SUB56]], i32* [[DOTCAPTURE_EXPR_53]], align 4
-// CHECK20-NEXT:    [[TMP169:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_53]], align 4
-// CHECK20-NEXT:    [[ADD57:%.*]] = add nsw i32 [[TMP169]], 1
-// CHECK20-NEXT:    [[TMP170:%.*]] = zext i32 [[ADD57]] to i64
-// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP170]])
-// CHECK20-NEXT:    [[TMP171:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174.region_id, i32 4, i8** [[TMP164]], i8** [[TMP165]], i64* [[TMP166]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.13, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP172:%.*]] = icmp ne i32 [[TMP171]], 0
-// CHECK20-NEXT:    br i1 [[TMP172]], label [[OMP_OFFLOAD_FAILED58:%.*]], label [[OMP_OFFLOAD_CONT59:%.*]]
+// CHECK20-NEXT:    [[TMP162:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_53]], align 4
+// CHECK20-NEXT:    [[ADD57:%.*]] = add nsw i32 [[TMP162]], 1
+// CHECK20-NEXT:    [[TMP163:%.*]] = zext i32 [[ADD57]] to i64
+// CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 [[TMP163]])
+// CHECK20-NEXT:    [[TMP164:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174.region_id, i32 4, i8** [[TMP157]], i8** [[TMP158]], i64* [[TMP159]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes.17, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP165:%.*]] = icmp ne i32 [[TMP164]], 0
+// CHECK20-NEXT:    br i1 [[TMP165]], label [[OMP_OFFLOAD_FAILED58:%.*]], label [[OMP_OFFLOAD_CONT59:%.*]]
 // CHECK20:       omp_offload.failed58:
-// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174(i32 [[TMP135]], i32 [[TMP137]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l174(i32 [[TMP130]], i32 [[TMP132]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT59]]
 // CHECK20:       omp_offload.cont59:
-// CHECK20-NEXT:    [[TMP173:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK20-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP173]])
+// CHECK20-NEXT:    [[TMP166:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK20-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP166]])
 // CHECK20-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK20-NEXT:    [[TMP174:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP174]])
-// CHECK20-NEXT:    [[TMP175:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK20-NEXT:    ret i32 [[TMP175]]
+// CHECK20-NEXT:    [[TMP167:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP167]])
+// CHECK20-NEXT:    [[TMP168:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK20-NEXT:    ret i32 [[TMP168]]
 //
 //
 // CHECK20-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l154
@@ -27976,11 +27864,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK20-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK20-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..6
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -28058,7 +27946,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !29
 // CHECK20-NEXT:    store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29
 // CHECK20-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]), !llvm.access.group !29
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]), !llvm.access.group !29
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29
@@ -28112,7 +28000,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..6
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -28239,11 +28127,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 4
 // CHECK20-NEXT:    [[TMP0:%.*]] = load i32, i32* [[VLA_ADDR]], align 4
 // CHECK20-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..8
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -28313,7 +28201,7 @@ int main (int argc, char **argv) {
 // CHECK20:       omp.inner.for.body:
 // CHECK20-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35
 // CHECK20-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !35
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !35
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35
@@ -28344,7 +28232,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..9
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..11
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -28474,11 +28362,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK20-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK20-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..11
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..14
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -28554,7 +28442,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !41
 // CHECK20-NEXT:    store i32 [[TMP18]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41
 // CHECK20-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]]), !llvm.access.group !41
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]]), !llvm.access.group !41
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41
@@ -28585,7 +28473,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..12
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..15
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -28699,7 +28587,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK20-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK20-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK20-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK20-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -28739,7 +28627,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK20-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l122.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.16, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l122.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.20, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.21, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK20-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK20-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK20:       omp_offload.failed:
@@ -28757,7 +28645,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK20-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l127.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.19, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.20, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l127.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.24, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.25, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK20-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK20-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK20:       omp_offload.failed5:
@@ -28786,7 +28674,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK20-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l132.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.23, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.24, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP32:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l132.region_id, i32 2, i8** [[TMP30]], i8** [[TMP31]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.28, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.29, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK20-NEXT:    [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
 // CHECK20-NEXT:    br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK20:       omp_offload.failed11:
@@ -28804,7 +28692,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS14]], i32 0, i32 0
 // CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK20-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l137.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.27, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.28, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP41:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l137.region_id, i32 1, i8** [[TMP39]], i8** [[TMP40]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.32, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.33, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK20-NEXT:    [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0
 // CHECK20-NEXT:    br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]]
 // CHECK20:       omp_offload.failed17:
@@ -28833,7 +28721,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
 // CHECK20-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
 // CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 10)
-// CHECK20-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l142.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.31, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.32, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP57:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l142.region_id, i32 2, i8** [[TMP55]], i8** [[TMP56]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.36, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.37, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
 // CHECK20-NEXT:    [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0
 // CHECK20-NEXT:    br i1 [[TMP58]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]]
 // CHECK20:       omp_offload.failed24:
@@ -28849,11 +28737,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK20-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK20-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..14 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..14
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..18
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -28899,7 +28787,7 @@ int main (int argc, char **argv) {
 // CHECK20:       omp.inner.for.body:
 // CHECK20-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47
 // CHECK20-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47
@@ -28921,7 +28809,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..15
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..19
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -29008,11 +28896,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK20-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK20-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..17 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..22 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..17
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..22
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -29058,7 +28946,7 @@ int main (int argc, char **argv) {
 // CHECK20:       omp.inner.for.body:
 // CHECK20-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !53
 // CHECK20-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53
@@ -29080,7 +28968,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..18
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..23
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -29176,11 +29064,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK20-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK20-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..21 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..26 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..21
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..26
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -29232,7 +29120,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !59
 // CHECK20-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59
 // CHECK20-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59
@@ -29254,7 +29142,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..22
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..27
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -29363,11 +29251,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK20-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK20-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..25 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..30 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..25
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..30
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -29413,7 +29301,7 @@ int main (int argc, char **argv) {
 // CHECK20:       omp.inner.for.body:
 // CHECK20-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !65
 // CHECK20-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65
@@ -29435,7 +29323,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..26
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..31
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -29529,11 +29417,11 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK20-NEXT:    store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK20-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..29 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*, i32)* @.omp_outlined..34 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]], i32 [[TMP3]])
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..29
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..34
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -29585,7 +29473,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !71
 // CHECK20-NEXT:    store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71
 // CHECK20-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71
-// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71
+// CHECK20-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71
 // CHECK20-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK20:       omp.inner.for.inc:
 // CHECK20-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71
@@ -29607,7 +29495,7 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    ret void
 //
 //
-// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..30
+// CHECK20-LABEL: define {{[^@]+}}@.omp_outlined..35
 // CHECK20-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -29690,7 +29578,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK20-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK20-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK20-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK20-NEXT:    ret void

diff  --git a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp
index 33e924ee36be0..42ac0919939c8 100644
--- a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp
@@ -2385,14 +2385,14 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK9-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK9-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK9-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK9-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK9-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK9-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK9-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK9-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK9-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2401,46 +2401,44 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK9-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK9-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK9-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK9-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK9-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l75.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK9-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK9-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK9-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK9-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l75.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK9-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK9-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l75(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
 // CHECK9-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 0
-// CHECK9-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-// CHECK9-NEXT:    [[TMP34:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP34]])
-// CHECK9-NEXT:    ret i32 [[TMP33]]
+// CHECK9-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK9-NEXT:    [[TMP33:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP33]])
+// CHECK9-NEXT:    ret i32 [[TMP32]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l75
@@ -2567,7 +2565,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK9-SAME: () #[[ATTR4:[0-9]+]] {
+// CHECK9-SAME: () #[[ATTR5:[0-9]+]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK9-NEXT:    ret void
@@ -2599,14 +2597,14 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK10-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK10-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK10-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK10-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK10-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK10-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK10-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK10-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK10-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK10-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2615,46 +2613,44 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK10-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK10-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK10-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK10-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK10-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK10-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK10-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK10-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK10-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK10-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l75.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK10-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK10-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK10-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK10-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK10-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l75.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK10-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK10-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
 // CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l75(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK10:       omp_offload.cont:
 // CHECK10-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 0
-// CHECK10-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-// CHECK10-NEXT:    [[TMP34:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP34]])
-// CHECK10-NEXT:    ret i32 [[TMP33]]
+// CHECK10-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK10-NEXT:    [[TMP33:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP33]])
+// CHECK10-NEXT:    ret i32 [[TMP32]]
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l75
@@ -2781,7 +2777,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK10-SAME: () #[[ATTR4:[0-9]+]] {
+// CHECK10-SAME: () #[[ATTR5:[0-9]+]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK10-NEXT:    ret void
@@ -2812,14 +2808,14 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK11-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK11-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK11-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK11-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK11-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK11-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK11-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK11-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK11-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2828,46 +2824,44 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK11-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK11-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK11-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK11-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK11-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK11-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK11-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK11-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l75.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK11-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK11-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK11-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK11-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l75.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK11-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK11-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l75(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
 // CHECK11-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i32 0
-// CHECK11-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-// CHECK11-NEXT:    [[TMP34:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP34]])
-// CHECK11-NEXT:    ret i32 [[TMP33]]
+// CHECK11-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP33]])
+// CHECK11-NEXT:    ret i32 [[TMP32]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l75
@@ -2992,7 +2986,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK11-SAME: () #[[ATTR4:[0-9]+]] {
+// CHECK11-SAME: () #[[ATTR5:[0-9]+]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK11-NEXT:    ret void
@@ -3023,14 +3017,14 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK12-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK12-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK12-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK12-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK12-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK12-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK12-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK12-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK12-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK12-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -3039,46 +3033,44 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK12-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK12-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK12-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK12-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK12-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK12-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK12-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK12-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK12-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK12-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l75.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK12-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK12-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK12-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK12-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK12-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l75.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK12-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK12-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
 // CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l75(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK12:       omp_offload.cont:
 // CHECK12-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i32 0
-// CHECK12-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-// CHECK12-NEXT:    [[TMP34:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP34]])
-// CHECK12-NEXT:    ret i32 [[TMP33]]
+// CHECK12-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK12-NEXT:    [[TMP33:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP33]])
+// CHECK12-NEXT:    ret i32 [[TMP32]]
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l75
@@ -3203,7 +3195,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK12-SAME: () #[[ATTR4:[0-9]+]] {
+// CHECK12-SAME: () #[[ATTR5:[0-9]+]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK12-NEXT:    ret void
@@ -3541,24 +3533,24 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP4:%.*]] = ptrtoint i8* [[TMP1]] to i64
 // CHECK17-NEXT:    [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]]
 // CHECK17-NEXT:    [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CHECK17-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to %struct.SS**
-// CHECK17-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP8]], align 8
-// CHECK17-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [123 x i32]**
-// CHECK17-NEXT:    store [123 x i32]* [[A]], [123 x i32]** [[TMP10]], align 8
-// CHECK17-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 [[TMP6]], i64* [[TMP11]], align 8
-// CHECK17-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK17-NEXT:    store i8* null, i8** [[TMP12]], align 8
-// CHECK17-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to %struct.SS**
-// CHECK17-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP14]], align 8
-// CHECK17-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK17-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to float**
-// CHECK17-NEXT:    store float* [[B]], float** [[TMP16]], align 8
-// CHECK17-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 4, i64* [[TMP17]], align 8
+// CHECK17-NEXT:    [[TMP7:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP7]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK17-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to %struct.SS**
+// CHECK17-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP9]], align 8
+// CHECK17-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to [123 x i32]**
+// CHECK17-NEXT:    store [123 x i32]* [[A]], [123 x i32]** [[TMP11]], align 8
+// CHECK17-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK17-NEXT:    store i64 [[TMP6]], i64* [[TMP12]], align 8
+// CHECK17-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK17-NEXT:    store i8* null, i8** [[TMP13]], align 8
+// CHECK17-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to %struct.SS**
+// CHECK17-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP15]], align 8
+// CHECK17-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to float**
+// CHECK17-NEXT:    store float* [[B]], float** [[TMP17]], align 8
 // CHECK17-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
 // CHECK17-NEXT:    store i8* null, i8** [[TMP18]], align 8
 // CHECK17-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
@@ -3567,25 +3559,23 @@ int main (int argc, char **argv) {
 // CHECK17-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
 // CHECK17-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to [123 x i32]**
 // CHECK17-NEXT:    store [123 x i32]* [[A]], [123 x i32]** [[TMP22]], align 8
-// CHECK17-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 492, i64* [[TMP23]], align 8
-// CHECK17-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP24]], align 8
-// CHECK17-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP23]], align 8
+// CHECK17-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK17-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 123)
-// CHECK17-NEXT:    [[TMP28:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123.region_id, i32 3, i8** [[TMP25]], i8** [[TMP26]], i64* [[TMP27]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK17-NEXT:    [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
-// CHECK17-NEXT:    br i1 [[TMP29]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK17-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK17-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
+// CHECK17-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK17:       omp_offload.failed:
 // CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123(%struct.SS* [[THIS1]]) #[[ATTR2:[0-9]+]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK17:       omp_offload.cont:
 // CHECK17-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 0
 // CHECK17-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A2]], i64 0, i64 0
-// CHECK17-NEXT:    [[TMP30:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-// CHECK17-NEXT:    ret i32 [[TMP30]]
+// CHECK17-NEXT:    [[TMP29:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK17-NEXT:    ret i32 [[TMP29]]
 //
 //
 // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123
@@ -3677,7 +3667,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK17-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK17-SAME: () #[[ATTR3:[0-9]+]] {
+// CHECK17-SAME: () #[[ATTR4:[0-9]+]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK17-NEXT:    ret void
@@ -3711,24 +3701,24 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP4:%.*]] = ptrtoint i8* [[TMP1]] to i64
 // CHECK18-NEXT:    [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]]
 // CHECK18-NEXT:    [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CHECK18-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to %struct.SS**
-// CHECK18-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP8]], align 8
-// CHECK18-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [123 x i32]**
-// CHECK18-NEXT:    store [123 x i32]* [[A]], [123 x i32]** [[TMP10]], align 8
-// CHECK18-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 [[TMP6]], i64* [[TMP11]], align 8
-// CHECK18-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK18-NEXT:    store i8* null, i8** [[TMP12]], align 8
-// CHECK18-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to %struct.SS**
-// CHECK18-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP14]], align 8
-// CHECK18-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK18-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to float**
-// CHECK18-NEXT:    store float* [[B]], float** [[TMP16]], align 8
-// CHECK18-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 4, i64* [[TMP17]], align 8
+// CHECK18-NEXT:    [[TMP7:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP7]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK18-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to %struct.SS**
+// CHECK18-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP9]], align 8
+// CHECK18-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to [123 x i32]**
+// CHECK18-NEXT:    store [123 x i32]* [[A]], [123 x i32]** [[TMP11]], align 8
+// CHECK18-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK18-NEXT:    store i64 [[TMP6]], i64* [[TMP12]], align 8
+// CHECK18-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK18-NEXT:    store i8* null, i8** [[TMP13]], align 8
+// CHECK18-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to %struct.SS**
+// CHECK18-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP15]], align 8
+// CHECK18-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK18-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to float**
+// CHECK18-NEXT:    store float* [[B]], float** [[TMP17]], align 8
 // CHECK18-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
 // CHECK18-NEXT:    store i8* null, i8** [[TMP18]], align 8
 // CHECK18-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
@@ -3737,25 +3727,23 @@ int main (int argc, char **argv) {
 // CHECK18-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
 // CHECK18-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to [123 x i32]**
 // CHECK18-NEXT:    store [123 x i32]* [[A]], [123 x i32]** [[TMP22]], align 8
-// CHECK18-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 492, i64* [[TMP23]], align 8
-// CHECK18-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP24]], align 8
-// CHECK18-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP23]], align 8
+// CHECK18-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK18-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 123)
-// CHECK18-NEXT:    [[TMP28:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123.region_id, i32 3, i8** [[TMP25]], i8** [[TMP26]], i64* [[TMP27]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK18-NEXT:    [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
-// CHECK18-NEXT:    br i1 [[TMP29]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK18-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK18-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
+// CHECK18-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK18:       omp_offload.failed:
 // CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123(%struct.SS* [[THIS1]]) #[[ATTR2:[0-9]+]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK18:       omp_offload.cont:
 // CHECK18-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 0
 // CHECK18-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A2]], i64 0, i64 0
-// CHECK18-NEXT:    [[TMP30:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-// CHECK18-NEXT:    ret i32 [[TMP30]]
+// CHECK18-NEXT:    [[TMP29:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK18-NEXT:    ret i32 [[TMP29]]
 //
 //
 // CHECK18-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123
@@ -3847,7 +3835,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK18-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK18-SAME: () #[[ATTR3:[0-9]+]] {
+// CHECK18-SAME: () #[[ATTR4:[0-9]+]] {
 // CHECK18-NEXT:  entry:
 // CHECK18-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK18-NEXT:    ret void
@@ -3881,24 +3869,24 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP4:%.*]] = ptrtoint i8* [[TMP1]] to i64
 // CHECK19-NEXT:    [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]]
 // CHECK19-NEXT:    [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CHECK19-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to %struct.SS**
-// CHECK19-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP8]], align 4
-// CHECK19-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [123 x i32]**
-// CHECK19-NEXT:    store [123 x i32]* [[A]], [123 x i32]** [[TMP10]], align 4
-// CHECK19-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 [[TMP6]], i64* [[TMP11]], align 4
-// CHECK19-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK19-NEXT:    store i8* null, i8** [[TMP12]], align 4
-// CHECK19-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to %struct.SS**
-// CHECK19-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP14]], align 4
-// CHECK19-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK19-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to float**
-// CHECK19-NEXT:    store float* [[B]], float** [[TMP16]], align 4
-// CHECK19-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP17]], align 4
+// CHECK19-NEXT:    [[TMP7:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP7]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK19-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to %struct.SS**
+// CHECK19-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP9]], align 4
+// CHECK19-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to [123 x i32]**
+// CHECK19-NEXT:    store [123 x i32]* [[A]], [123 x i32]** [[TMP11]], align 4
+// CHECK19-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK19-NEXT:    store i64 [[TMP6]], i64* [[TMP12]], align 4
+// CHECK19-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK19-NEXT:    store i8* null, i8** [[TMP13]], align 4
+// CHECK19-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to %struct.SS**
+// CHECK19-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP15]], align 4
+// CHECK19-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK19-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to float**
+// CHECK19-NEXT:    store float* [[B]], float** [[TMP17]], align 4
 // CHECK19-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
 // CHECK19-NEXT:    store i8* null, i8** [[TMP18]], align 4
 // CHECK19-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
@@ -3907,25 +3895,23 @@ int main (int argc, char **argv) {
 // CHECK19-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
 // CHECK19-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to [123 x i32]**
 // CHECK19-NEXT:    store [123 x i32]* [[A]], [123 x i32]** [[TMP22]], align 4
-// CHECK19-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 492, i64* [[TMP23]], align 4
-// CHECK19-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP24]], align 4
-// CHECK19-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP23]], align 4
+// CHECK19-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK19-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 123)
-// CHECK19-NEXT:    [[TMP28:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123.region_id, i32 3, i8** [[TMP25]], i8** [[TMP26]], i64* [[TMP27]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK19-NEXT:    [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
-// CHECK19-NEXT:    br i1 [[TMP29]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK19-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK19-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
+// CHECK19-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK19:       omp_offload.failed:
 // CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123(%struct.SS* [[THIS1]]) #[[ATTR2:[0-9]+]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK19:       omp_offload.cont:
 // CHECK19-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 0
 // CHECK19-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A2]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP30:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-// CHECK19-NEXT:    ret i32 [[TMP30]]
+// CHECK19-NEXT:    [[TMP29:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK19-NEXT:    ret i32 [[TMP29]]
 //
 //
 // CHECK19-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123
@@ -4016,7 +4002,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK19-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK19-SAME: () #[[ATTR3:[0-9]+]] {
+// CHECK19-SAME: () #[[ATTR4:[0-9]+]] {
 // CHECK19-NEXT:  entry:
 // CHECK19-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK19-NEXT:    ret void
@@ -4050,24 +4036,24 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP4:%.*]] = ptrtoint i8* [[TMP1]] to i64
 // CHECK20-NEXT:    [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]]
 // CHECK20-NEXT:    [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CHECK20-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to %struct.SS**
-// CHECK20-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP8]], align 4
-// CHECK20-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [123 x i32]**
-// CHECK20-NEXT:    store [123 x i32]* [[A]], [123 x i32]** [[TMP10]], align 4
-// CHECK20-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 [[TMP6]], i64* [[TMP11]], align 4
-// CHECK20-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK20-NEXT:    store i8* null, i8** [[TMP12]], align 4
-// CHECK20-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to %struct.SS**
-// CHECK20-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP14]], align 4
-// CHECK20-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK20-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to float**
-// CHECK20-NEXT:    store float* [[B]], float** [[TMP16]], align 4
-// CHECK20-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP17]], align 4
+// CHECK20-NEXT:    [[TMP7:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP7]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK20-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to %struct.SS**
+// CHECK20-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP9]], align 4
+// CHECK20-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to [123 x i32]**
+// CHECK20-NEXT:    store [123 x i32]* [[A]], [123 x i32]** [[TMP11]], align 4
+// CHECK20-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK20-NEXT:    store i64 [[TMP6]], i64* [[TMP12]], align 4
+// CHECK20-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK20-NEXT:    store i8* null, i8** [[TMP13]], align 4
+// CHECK20-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to %struct.SS**
+// CHECK20-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP15]], align 4
+// CHECK20-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK20-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to float**
+// CHECK20-NEXT:    store float* [[B]], float** [[TMP17]], align 4
 // CHECK20-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
 // CHECK20-NEXT:    store i8* null, i8** [[TMP18]], align 4
 // CHECK20-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
@@ -4076,25 +4062,23 @@ int main (int argc, char **argv) {
 // CHECK20-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
 // CHECK20-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to [123 x i32]**
 // CHECK20-NEXT:    store [123 x i32]* [[A]], [123 x i32]** [[TMP22]], align 4
-// CHECK20-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 492, i64* [[TMP23]], align 4
-// CHECK20-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP24]], align 4
-// CHECK20-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP23]], align 4
+// CHECK20-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK20-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 123)
-// CHECK20-NEXT:    [[TMP28:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123.region_id, i32 3, i8** [[TMP25]], i8** [[TMP26]], i64* [[TMP27]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK20-NEXT:    [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
-// CHECK20-NEXT:    br i1 [[TMP29]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK20-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK20-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
+// CHECK20-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK20:       omp_offload.failed:
 // CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123(%struct.SS* [[THIS1]]) #[[ATTR2:[0-9]+]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK20:       omp_offload.cont:
 // CHECK20-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 0
 // CHECK20-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A2]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP30:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-// CHECK20-NEXT:    ret i32 [[TMP30]]
+// CHECK20-NEXT:    [[TMP29:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK20-NEXT:    ret i32 [[TMP29]]
 //
 //
 // CHECK20-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123
@@ -4185,7 +4169,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK20-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK20-SAME: () #[[ATTR3:[0-9]+]] {
+// CHECK20-SAME: () #[[ATTR4:[0-9]+]] {
 // CHECK20-NEXT:  entry:
 // CHECK20-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK20-NEXT:    ret void
@@ -4219,24 +4203,24 @@ int main (int argc, char **argv) {
 // CHECK21-NEXT:    [[TMP4:%.*]] = ptrtoint i8* [[TMP1]] to i64
 // CHECK21-NEXT:    [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]]
 // CHECK21-NEXT:    [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CHECK21-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK21-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to %struct.SS**
-// CHECK21-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP8]], align 8
-// CHECK21-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK21-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [123 x i32]**
-// CHECK21-NEXT:    store [123 x i32]* [[A]], [123 x i32]** [[TMP10]], align 8
-// CHECK21-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK21-NEXT:    store i64 [[TMP6]], i64* [[TMP11]], align 8
-// CHECK21-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK21-NEXT:    store i8* null, i8** [[TMP12]], align 8
-// CHECK21-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK21-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to %struct.SS**
-// CHECK21-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP14]], align 8
-// CHECK21-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK21-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to float**
-// CHECK21-NEXT:    store float* [[B]], float** [[TMP16]], align 8
-// CHECK21-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK21-NEXT:    store i64 4, i64* [[TMP17]], align 8
+// CHECK21-NEXT:    [[TMP7:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK21-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP7]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK21-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK21-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to %struct.SS**
+// CHECK21-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP9]], align 8
+// CHECK21-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK21-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to [123 x i32]**
+// CHECK21-NEXT:    store [123 x i32]* [[A]], [123 x i32]** [[TMP11]], align 8
+// CHECK21-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK21-NEXT:    store i64 [[TMP6]], i64* [[TMP12]], align 8
+// CHECK21-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK21-NEXT:    store i8* null, i8** [[TMP13]], align 8
+// CHECK21-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK21-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to %struct.SS**
+// CHECK21-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP15]], align 8
+// CHECK21-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK21-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to float**
+// CHECK21-NEXT:    store float* [[B]], float** [[TMP17]], align 8
 // CHECK21-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
 // CHECK21-NEXT:    store i8* null, i8** [[TMP18]], align 8
 // CHECK21-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
@@ -4245,25 +4229,23 @@ int main (int argc, char **argv) {
 // CHECK21-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
 // CHECK21-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to [123 x i32]**
 // CHECK21-NEXT:    store [123 x i32]* [[A]], [123 x i32]** [[TMP22]], align 8
-// CHECK21-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK21-NEXT:    store i64 492, i64* [[TMP23]], align 8
-// CHECK21-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK21-NEXT:    store i8* null, i8** [[TMP24]], align 8
-// CHECK21-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK21-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK21-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK21-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK21-NEXT:    store i8* null, i8** [[TMP23]], align 8
+// CHECK21-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK21-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK21-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK21-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 123)
-// CHECK21-NEXT:    [[TMP28:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123.region_id, i32 3, i8** [[TMP25]], i8** [[TMP26]], i64* [[TMP27]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK21-NEXT:    [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
-// CHECK21-NEXT:    br i1 [[TMP29]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK21-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK21-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
+// CHECK21-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK21:       omp_offload.failed:
 // CHECK21-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123(%struct.SS* [[THIS1]]) #[[ATTR2:[0-9]+]]
 // CHECK21-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK21:       omp_offload.cont:
 // CHECK21-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 0
 // CHECK21-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A2]], i64 0, i64 0
-// CHECK21-NEXT:    [[TMP30:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-// CHECK21-NEXT:    ret i32 [[TMP30]]
+// CHECK21-NEXT:    [[TMP29:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK21-NEXT:    ret i32 [[TMP29]]
 //
 //
 // CHECK21-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123
@@ -4407,7 +4389,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK21-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK21-SAME: () #[[ATTR3:[0-9]+]] {
+// CHECK21-SAME: () #[[ATTR4:[0-9]+]] {
 // CHECK21-NEXT:  entry:
 // CHECK21-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK21-NEXT:    ret void
@@ -4441,24 +4423,24 @@ int main (int argc, char **argv) {
 // CHECK22-NEXT:    [[TMP4:%.*]] = ptrtoint i8* [[TMP1]] to i64
 // CHECK22-NEXT:    [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]]
 // CHECK22-NEXT:    [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CHECK22-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK22-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to %struct.SS**
-// CHECK22-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP8]], align 8
-// CHECK22-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK22-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [123 x i32]**
-// CHECK22-NEXT:    store [123 x i32]* [[A]], [123 x i32]** [[TMP10]], align 8
-// CHECK22-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK22-NEXT:    store i64 [[TMP6]], i64* [[TMP11]], align 8
-// CHECK22-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK22-NEXT:    store i8* null, i8** [[TMP12]], align 8
-// CHECK22-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK22-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to %struct.SS**
-// CHECK22-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP14]], align 8
-// CHECK22-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK22-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to float**
-// CHECK22-NEXT:    store float* [[B]], float** [[TMP16]], align 8
-// CHECK22-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK22-NEXT:    store i64 4, i64* [[TMP17]], align 8
+// CHECK22-NEXT:    [[TMP7:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK22-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP7]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK22-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK22-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to %struct.SS**
+// CHECK22-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP9]], align 8
+// CHECK22-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK22-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to [123 x i32]**
+// CHECK22-NEXT:    store [123 x i32]* [[A]], [123 x i32]** [[TMP11]], align 8
+// CHECK22-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK22-NEXT:    store i64 [[TMP6]], i64* [[TMP12]], align 8
+// CHECK22-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK22-NEXT:    store i8* null, i8** [[TMP13]], align 8
+// CHECK22-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK22-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to %struct.SS**
+// CHECK22-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP15]], align 8
+// CHECK22-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK22-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to float**
+// CHECK22-NEXT:    store float* [[B]], float** [[TMP17]], align 8
 // CHECK22-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
 // CHECK22-NEXT:    store i8* null, i8** [[TMP18]], align 8
 // CHECK22-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
@@ -4467,25 +4449,23 @@ int main (int argc, char **argv) {
 // CHECK22-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
 // CHECK22-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to [123 x i32]**
 // CHECK22-NEXT:    store [123 x i32]* [[A]], [123 x i32]** [[TMP22]], align 8
-// CHECK22-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK22-NEXT:    store i64 492, i64* [[TMP23]], align 8
-// CHECK22-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK22-NEXT:    store i8* null, i8** [[TMP24]], align 8
-// CHECK22-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK22-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK22-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK22-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK22-NEXT:    store i8* null, i8** [[TMP23]], align 8
+// CHECK22-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK22-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK22-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK22-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 123)
-// CHECK22-NEXT:    [[TMP28:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123.region_id, i32 3, i8** [[TMP25]], i8** [[TMP26]], i64* [[TMP27]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK22-NEXT:    [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
-// CHECK22-NEXT:    br i1 [[TMP29]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK22-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK22-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
+// CHECK22-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK22:       omp_offload.failed:
 // CHECK22-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123(%struct.SS* [[THIS1]]) #[[ATTR2:[0-9]+]]
 // CHECK22-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK22:       omp_offload.cont:
 // CHECK22-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 0
 // CHECK22-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A2]], i64 0, i64 0
-// CHECK22-NEXT:    [[TMP30:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-// CHECK22-NEXT:    ret i32 [[TMP30]]
+// CHECK22-NEXT:    [[TMP29:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK22-NEXT:    ret i32 [[TMP29]]
 //
 //
 // CHECK22-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123
@@ -4629,7 +4609,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK22-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK22-SAME: () #[[ATTR3:[0-9]+]] {
+// CHECK22-SAME: () #[[ATTR4:[0-9]+]] {
 // CHECK22-NEXT:  entry:
 // CHECK22-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK22-NEXT:    ret void
@@ -4663,24 +4643,24 @@ int main (int argc, char **argv) {
 // CHECK23-NEXT:    [[TMP4:%.*]] = ptrtoint i8* [[TMP1]] to i64
 // CHECK23-NEXT:    [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]]
 // CHECK23-NEXT:    [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CHECK23-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK23-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to %struct.SS**
-// CHECK23-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP8]], align 4
-// CHECK23-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK23-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [123 x i32]**
-// CHECK23-NEXT:    store [123 x i32]* [[A]], [123 x i32]** [[TMP10]], align 4
-// CHECK23-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK23-NEXT:    store i64 [[TMP6]], i64* [[TMP11]], align 4
-// CHECK23-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK23-NEXT:    store i8* null, i8** [[TMP12]], align 4
-// CHECK23-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK23-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to %struct.SS**
-// CHECK23-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP14]], align 4
-// CHECK23-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK23-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to float**
-// CHECK23-NEXT:    store float* [[B]], float** [[TMP16]], align 4
-// CHECK23-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK23-NEXT:    store i64 4, i64* [[TMP17]], align 4
+// CHECK23-NEXT:    [[TMP7:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK23-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP7]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK23-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK23-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to %struct.SS**
+// CHECK23-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP9]], align 4
+// CHECK23-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK23-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to [123 x i32]**
+// CHECK23-NEXT:    store [123 x i32]* [[A]], [123 x i32]** [[TMP11]], align 4
+// CHECK23-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK23-NEXT:    store i64 [[TMP6]], i64* [[TMP12]], align 4
+// CHECK23-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK23-NEXT:    store i8* null, i8** [[TMP13]], align 4
+// CHECK23-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK23-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to %struct.SS**
+// CHECK23-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP15]], align 4
+// CHECK23-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK23-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to float**
+// CHECK23-NEXT:    store float* [[B]], float** [[TMP17]], align 4
 // CHECK23-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
 // CHECK23-NEXT:    store i8* null, i8** [[TMP18]], align 4
 // CHECK23-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
@@ -4689,25 +4669,23 @@ int main (int argc, char **argv) {
 // CHECK23-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
 // CHECK23-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to [123 x i32]**
 // CHECK23-NEXT:    store [123 x i32]* [[A]], [123 x i32]** [[TMP22]], align 4
-// CHECK23-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK23-NEXT:    store i64 492, i64* [[TMP23]], align 4
-// CHECK23-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK23-NEXT:    store i8* null, i8** [[TMP24]], align 4
-// CHECK23-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK23-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK23-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK23-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK23-NEXT:    store i8* null, i8** [[TMP23]], align 4
+// CHECK23-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK23-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK23-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK23-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 123)
-// CHECK23-NEXT:    [[TMP28:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123.region_id, i32 3, i8** [[TMP25]], i8** [[TMP26]], i64* [[TMP27]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK23-NEXT:    [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
-// CHECK23-NEXT:    br i1 [[TMP29]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK23-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK23-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
+// CHECK23-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK23:       omp_offload.failed:
 // CHECK23-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123(%struct.SS* [[THIS1]]) #[[ATTR2:[0-9]+]]
 // CHECK23-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK23:       omp_offload.cont:
 // CHECK23-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 0
 // CHECK23-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A2]], i32 0, i32 0
-// CHECK23-NEXT:    [[TMP30:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-// CHECK23-NEXT:    ret i32 [[TMP30]]
+// CHECK23-NEXT:    [[TMP29:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK23-NEXT:    ret i32 [[TMP29]]
 //
 //
 // CHECK23-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123
@@ -4849,7 +4827,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK23-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK23-SAME: () #[[ATTR3:[0-9]+]] {
+// CHECK23-SAME: () #[[ATTR4:[0-9]+]] {
 // CHECK23-NEXT:  entry:
 // CHECK23-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK23-NEXT:    ret void
@@ -4883,24 +4861,24 @@ int main (int argc, char **argv) {
 // CHECK24-NEXT:    [[TMP4:%.*]] = ptrtoint i8* [[TMP1]] to i64
 // CHECK24-NEXT:    [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]]
 // CHECK24-NEXT:    [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CHECK24-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK24-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to %struct.SS**
-// CHECK24-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP8]], align 4
-// CHECK24-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK24-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [123 x i32]**
-// CHECK24-NEXT:    store [123 x i32]* [[A]], [123 x i32]** [[TMP10]], align 4
-// CHECK24-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK24-NEXT:    store i64 [[TMP6]], i64* [[TMP11]], align 4
-// CHECK24-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK24-NEXT:    store i8* null, i8** [[TMP12]], align 4
-// CHECK24-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK24-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to %struct.SS**
-// CHECK24-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP14]], align 4
-// CHECK24-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK24-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to float**
-// CHECK24-NEXT:    store float* [[B]], float** [[TMP16]], align 4
-// CHECK24-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK24-NEXT:    store i64 4, i64* [[TMP17]], align 4
+// CHECK24-NEXT:    [[TMP7:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK24-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP7]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK24-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK24-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to %struct.SS**
+// CHECK24-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP9]], align 4
+// CHECK24-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK24-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to [123 x i32]**
+// CHECK24-NEXT:    store [123 x i32]* [[A]], [123 x i32]** [[TMP11]], align 4
+// CHECK24-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK24-NEXT:    store i64 [[TMP6]], i64* [[TMP12]], align 4
+// CHECK24-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK24-NEXT:    store i8* null, i8** [[TMP13]], align 4
+// CHECK24-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK24-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to %struct.SS**
+// CHECK24-NEXT:    store %struct.SS* [[THIS1]], %struct.SS** [[TMP15]], align 4
+// CHECK24-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK24-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to float**
+// CHECK24-NEXT:    store float* [[B]], float** [[TMP17]], align 4
 // CHECK24-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
 // CHECK24-NEXT:    store i8* null, i8** [[TMP18]], align 4
 // CHECK24-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
@@ -4909,25 +4887,23 @@ int main (int argc, char **argv) {
 // CHECK24-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
 // CHECK24-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to [123 x i32]**
 // CHECK24-NEXT:    store [123 x i32]* [[A]], [123 x i32]** [[TMP22]], align 4
-// CHECK24-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK24-NEXT:    store i64 492, i64* [[TMP23]], align 4
-// CHECK24-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK24-NEXT:    store i8* null, i8** [[TMP24]], align 4
-// CHECK24-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK24-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK24-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK24-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK24-NEXT:    store i8* null, i8** [[TMP23]], align 4
+// CHECK24-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK24-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK24-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
 // CHECK24-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 123)
-// CHECK24-NEXT:    [[TMP28:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123.region_id, i32 3, i8** [[TMP25]], i8** [[TMP26]], i64* [[TMP27]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK24-NEXT:    [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
-// CHECK24-NEXT:    br i1 [[TMP29]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK24-NEXT:    [[TMP27:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK24-NEXT:    [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
+// CHECK24-NEXT:    br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK24:       omp_offload.failed:
 // CHECK24-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123(%struct.SS* [[THIS1]]) #[[ATTR2:[0-9]+]]
 // CHECK24-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK24:       omp_offload.cont:
 // CHECK24-NEXT:    [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[THIS1]], i32 0, i32 0
 // CHECK24-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A2]], i32 0, i32 0
-// CHECK24-NEXT:    [[TMP30:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-// CHECK24-NEXT:    ret i32 [[TMP30]]
+// CHECK24-NEXT:    [[TMP29:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK24-NEXT:    ret i32 [[TMP29]]
 //
 //
 // CHECK24-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l123
@@ -5069,7 +5045,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK24-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK24-SAME: () #[[ATTR3:[0-9]+]] {
+// CHECK24-SAME: () #[[ATTR4:[0-9]+]] {
 // CHECK24-NEXT:  entry:
 // CHECK24-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK24-NEXT:    ret void
@@ -5733,14 +5709,14 @@ int main (int argc, char **argv) {
 // CHECK33-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK33-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK33-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK33-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK33-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK33-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK33-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK33-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK33-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK33-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK33-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK33-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK33-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK33-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK33-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK33-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK33-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK33-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK33-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK33-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK33-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK33-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -5749,48 +5725,46 @@ int main (int argc, char **argv) {
 // CHECK33-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK33-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK33-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK33-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK33-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK33-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK33-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK33-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK33-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK33-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK33-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK33-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK33-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK33-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK33-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK33-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK33-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK33-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK33-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK33-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK33-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK33-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK33-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK33-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK33-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK33-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK33-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK33-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK33-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK33-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK33-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK33-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK33-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK33-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK33-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK33-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK33-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK33-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK33-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK33-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK33-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK33-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK33-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK33-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK33-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK33-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK33-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK33-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK33-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK33-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK33-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK33-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK33-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK33-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK33-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK33-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK33-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK33-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK33-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK33-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK33:       omp_offload.failed:
 // CHECK33-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK33-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK33:       omp_offload.cont:
-// CHECK33-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK33-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP33]])
+// CHECK33-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK33-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP32]])
 // CHECK33-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK33-NEXT:    [[TMP34:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK33-NEXT:    call void @llvm.stackrestore(i8* [[TMP34]])
-// CHECK33-NEXT:    [[TMP35:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK33-NEXT:    ret i32 [[TMP35]]
+// CHECK33-NEXT:    [[TMP33:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK33-NEXT:    call void @llvm.stackrestore(i8* [[TMP33]])
+// CHECK33-NEXT:    [[TMP34:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK33-NEXT:    ret i32 [[TMP34]]
 //
 //
 // CHECK33-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192
@@ -5917,7 +5891,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK33-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK33-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK33-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK33-NEXT:  entry:
 // CHECK33-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK33-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -5968,7 +5942,7 @@ int main (int argc, char **argv) {
 // CHECK33-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK33-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TE]], align 4
 // CHECK33-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK33-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l181.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 1)
+// CHECK33-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l181.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 1)
 // CHECK33-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK33-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK33:       omp_offload.failed:
@@ -6073,7 +6047,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK33-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK33-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK33-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK33-NEXT:  entry:
 // CHECK33-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK33-NEXT:    ret void
@@ -6111,14 +6085,14 @@ int main (int argc, char **argv) {
 // CHECK34-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK34-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK34-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK34-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK34-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK34-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK34-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK34-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK34-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK34-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK34-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK34-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK34-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK34-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK34-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK34-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK34-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK34-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK34-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK34-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK34-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK34-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -6127,48 +6101,46 @@ int main (int argc, char **argv) {
 // CHECK34-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK34-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK34-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK34-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK34-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK34-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK34-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK34-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK34-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK34-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK34-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK34-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK34-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK34-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK34-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK34-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK34-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK34-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK34-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK34-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK34-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK34-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK34-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK34-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK34-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK34-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK34-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK34-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK34-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK34-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK34-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK34-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK34-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK34-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK34-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK34-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK34-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK34-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK34-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK34-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK34-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK34-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK34-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK34-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK34-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK34-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK34-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK34-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK34-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK34-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK34-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK34-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK34-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK34-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK34-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK34-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK34-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK34-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK34-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK34-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK34:       omp_offload.failed:
 // CHECK34-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK34-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK34:       omp_offload.cont:
-// CHECK34-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK34-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP33]])
+// CHECK34-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK34-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP32]])
 // CHECK34-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK34-NEXT:    [[TMP34:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK34-NEXT:    call void @llvm.stackrestore(i8* [[TMP34]])
-// CHECK34-NEXT:    [[TMP35:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK34-NEXT:    ret i32 [[TMP35]]
+// CHECK34-NEXT:    [[TMP33:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK34-NEXT:    call void @llvm.stackrestore(i8* [[TMP33]])
+// CHECK34-NEXT:    [[TMP34:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK34-NEXT:    ret i32 [[TMP34]]
 //
 //
 // CHECK34-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192
@@ -6295,7 +6267,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK34-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK34-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK34-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK34-NEXT:  entry:
 // CHECK34-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK34-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -6346,7 +6318,7 @@ int main (int argc, char **argv) {
 // CHECK34-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK34-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TE]], align 4
 // CHECK34-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK34-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l181.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 1)
+// CHECK34-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l181.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 1)
 // CHECK34-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK34-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK34:       omp_offload.failed:
@@ -6451,7 +6423,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK34-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK34-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK34-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK34-NEXT:  entry:
 // CHECK34-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK34-NEXT:    ret void
@@ -6488,14 +6460,14 @@ int main (int argc, char **argv) {
 // CHECK35-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK35-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK35-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK35-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK35-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK35-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK35-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK35-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK35-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK35-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK35-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK35-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK35-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK35-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK35-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK35-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK35-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK35-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK35-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK35-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK35-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK35-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -6504,48 +6476,46 @@ int main (int argc, char **argv) {
 // CHECK35-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK35-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK35-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK35-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK35-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK35-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK35-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK35-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK35-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK35-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK35-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK35-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK35-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK35-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK35-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK35-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK35-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK35-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK35-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK35-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK35-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK35-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK35-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK35-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK35-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK35-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK35-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK35-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK35-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK35-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK35-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK35-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK35-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK35-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK35-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK35-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK35-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK35-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK35-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK35-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK35-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK35-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK35-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK35-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK35-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK35-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK35-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK35-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK35-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK35-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK35-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK35-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK35-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK35-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK35-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK35-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK35-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK35-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK35-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK35-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK35:       omp_offload.failed:
 // CHECK35-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK35-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK35:       omp_offload.cont:
-// CHECK35-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK35-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP33]])
+// CHECK35-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK35-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP32]])
 // CHECK35-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK35-NEXT:    [[TMP34:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK35-NEXT:    call void @llvm.stackrestore(i8* [[TMP34]])
-// CHECK35-NEXT:    [[TMP35:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK35-NEXT:    ret i32 [[TMP35]]
+// CHECK35-NEXT:    [[TMP33:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK35-NEXT:    call void @llvm.stackrestore(i8* [[TMP33]])
+// CHECK35-NEXT:    [[TMP34:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK35-NEXT:    ret i32 [[TMP34]]
 //
 //
 // CHECK35-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192
@@ -6670,7 +6640,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK35-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK35-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK35-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK35-NEXT:  entry:
 // CHECK35-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK35-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -6719,7 +6689,7 @@ int main (int argc, char **argv) {
 // CHECK35-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK35-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TE]], align 4
 // CHECK35-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK35-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l181.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 1)
+// CHECK35-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l181.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 1)
 // CHECK35-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK35-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK35:       omp_offload.failed:
@@ -6821,7 +6791,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK35-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK35-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK35-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK35-NEXT:  entry:
 // CHECK35-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK35-NEXT:    ret void
@@ -6858,14 +6828,14 @@ int main (int argc, char **argv) {
 // CHECK36-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK36-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK36-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK36-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK36-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK36-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK36-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK36-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK36-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK36-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK36-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK36-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK36-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK36-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK36-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK36-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK36-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK36-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK36-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK36-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK36-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK36-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -6874,48 +6844,46 @@ int main (int argc, char **argv) {
 // CHECK36-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK36-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK36-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK36-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK36-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK36-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK36-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK36-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK36-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK36-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK36-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK36-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK36-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK36-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK36-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK36-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK36-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK36-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK36-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK36-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK36-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK36-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK36-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK36-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK36-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK36-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK36-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK36-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK36-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK36-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK36-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK36-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK36-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK36-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK36-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK36-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK36-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK36-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK36-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK36-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK36-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK36-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK36-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK36-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK36-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK36-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK36-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK36-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK36-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK36-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK36-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK36-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK36-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK36-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK36-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK36-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK36-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK36-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK36-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK36-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK36:       omp_offload.failed:
 // CHECK36-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK36-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK36:       omp_offload.cont:
-// CHECK36-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK36-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP33]])
+// CHECK36-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK36-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP32]])
 // CHECK36-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK36-NEXT:    [[TMP34:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK36-NEXT:    call void @llvm.stackrestore(i8* [[TMP34]])
-// CHECK36-NEXT:    [[TMP35:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK36-NEXT:    ret i32 [[TMP35]]
+// CHECK36-NEXT:    [[TMP33:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK36-NEXT:    call void @llvm.stackrestore(i8* [[TMP33]])
+// CHECK36-NEXT:    [[TMP34:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK36-NEXT:    ret i32 [[TMP34]]
 //
 //
 // CHECK36-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192
@@ -7040,7 +7008,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK36-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK36-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK36-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK36-NEXT:  entry:
 // CHECK36-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK36-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -7089,7 +7057,7 @@ int main (int argc, char **argv) {
 // CHECK36-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK36-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TE]], align 4
 // CHECK36-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK36-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l181.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 1)
+// CHECK36-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l181.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 1)
 // CHECK36-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK36-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK36:       omp_offload.failed:
@@ -7191,7 +7159,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK36-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK36-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK36-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK36-NEXT:  entry:
 // CHECK36-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK36-NEXT:    ret void
@@ -7234,14 +7202,14 @@ int main (int argc, char **argv) {
 // CHECK37-NEXT:    store i32 [[TMP5]], i32* [[CONV1]], align 4
 // CHECK37-NEXT:    [[TMP6:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK37-NEXT:    [[TMP7:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK37-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK37-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK37-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK37-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK37-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to i64*
-// CHECK37-NEXT:    store i64 [[TMP4]], i64* [[TMP11]], align 8
-// CHECK37-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK37-NEXT:    store i64 4, i64* [[TMP12]], align 8
+// CHECK37-NEXT:    [[TMP8:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK37-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP8]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes to i8*), i64 32, i1 false)
+// CHECK37-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK37-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK37-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
+// CHECK37-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK37-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64*
+// CHECK37-NEXT:    store i64 [[TMP4]], i64* [[TMP12]], align 8
 // CHECK37-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK37-NEXT:    store i8* null, i8** [[TMP13]], align 8
 // CHECK37-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -7250,58 +7218,54 @@ int main (int argc, char **argv) {
 // CHECK37-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK37-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i64*
 // CHECK37-NEXT:    store i64 [[TMP6]], i64* [[TMP17]], align 8
-// CHECK37-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK37-NEXT:    store i64 4, i64* [[TMP18]], align 8
-// CHECK37-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK37-NEXT:    store i8* null, i8** [[TMP19]], align 8
-// CHECK37-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK37-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
-// CHECK37-NEXT:    store i64 [[TMP1]], i64* [[TMP21]], align 8
-// CHECK37-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK37-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
-// CHECK37-NEXT:    store i64 [[TMP1]], i64* [[TMP23]], align 8
-// CHECK37-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK37-NEXT:    store i64 8, i64* [[TMP24]], align 8
-// CHECK37-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK37-NEXT:    store i8* null, i8** [[TMP25]], align 8
-// CHECK37-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK37-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK37-NEXT:    store i8* null, i8** [[TMP18]], align 8
+// CHECK37-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK37-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i64*
+// CHECK37-NEXT:    store i64 [[TMP1]], i64* [[TMP20]], align 8
+// CHECK37-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK37-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
+// CHECK37-NEXT:    store i64 [[TMP1]], i64* [[TMP22]], align 8
+// CHECK37-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK37-NEXT:    store i8* null, i8** [[TMP23]], align 8
+// CHECK37-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK37-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32**
+// CHECK37-NEXT:    store i32* [[VLA]], i32** [[TMP25]], align 8
+// CHECK37-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK37-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32**
 // CHECK37-NEXT:    store i32* [[VLA]], i32** [[TMP27]], align 8
-// CHECK37-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK37-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32**
-// CHECK37-NEXT:    store i32* [[VLA]], i32** [[TMP29]], align 8
-// CHECK37-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK37-NEXT:    store i64 [[TMP7]], i64* [[TMP30]], align 8
-// CHECK37-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK37-NEXT:    store i8* null, i8** [[TMP31]], align 8
-// CHECK37-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK37-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK37-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK37-NEXT:    [[TMP35:%.*]] = load i32, i32* [[N]], align 4
-// CHECK37-NEXT:    store i32 [[TMP35]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK37-NEXT:    [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK37-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP36]], 0
+// CHECK37-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK37-NEXT:    store i64 [[TMP7]], i64* [[TMP28]], align 8
+// CHECK37-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK37-NEXT:    store i8* null, i8** [[TMP29]], align 8
+// CHECK37-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK37-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK37-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK37-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK37-NEXT:    store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK37-NEXT:    [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK37-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP34]], 0
 // CHECK37-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK37-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK37-NEXT:    store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK37-NEXT:    [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK37-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP37]], 1
-// CHECK37-NEXT:    [[TMP38:%.*]] = zext i32 [[ADD]] to i64
-// CHECK37-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP38]])
-// CHECK37-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192.region_id, i32 4, i8** [[TMP32]], i8** [[TMP33]], i64* [[TMP34]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK37-NEXT:    [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0
-// CHECK37-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK37-NEXT:    [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK37-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP35]], 1
+// CHECK37-NEXT:    [[TMP36:%.*]] = zext i32 [[ADD]] to i64
+// CHECK37-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP36]])
+// CHECK37-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192.region_id, i32 4, i8** [[TMP30]], i8** [[TMP31]], i64* [[TMP32]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK37-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK37-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK37:       omp_offload.failed:
 // CHECK37-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192(i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK37-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK37:       omp_offload.cont:
-// CHECK37-NEXT:    [[TMP41:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK37-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP41]])
+// CHECK37-NEXT:    [[TMP39:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK37-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP39]])
 // CHECK37-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK37-NEXT:    [[TMP42:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK37-NEXT:    call void @llvm.stackrestore(i8* [[TMP42]])
-// CHECK37-NEXT:    [[TMP43:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK37-NEXT:    ret i32 [[TMP43]]
+// CHECK37-NEXT:    [[TMP40:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK37-NEXT:    call void @llvm.stackrestore(i8* [[TMP40]])
+// CHECK37-NEXT:    [[TMP41:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK37-NEXT:    ret i32 [[TMP41]]
 //
 //
 // CHECK37-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192
@@ -7478,7 +7442,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK37-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK37-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK37-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK37-NEXT:  entry:
 // CHECK37-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK37-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -7529,7 +7493,7 @@ int main (int argc, char **argv) {
 // CHECK37-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK37-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TE]], align 4
 // CHECK37-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK37-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l181.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 1)
+// CHECK37-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l181.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 1)
 // CHECK37-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK37-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK37:       omp_offload.failed:
@@ -7634,7 +7598,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK37-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK37-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK37-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK37-NEXT:  entry:
 // CHECK37-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK37-NEXT:    ret void
@@ -7677,14 +7641,14 @@ int main (int argc, char **argv) {
 // CHECK38-NEXT:    store i32 [[TMP5]], i32* [[CONV1]], align 4
 // CHECK38-NEXT:    [[TMP6:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK38-NEXT:    [[TMP7:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK38-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK38-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK38-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK38-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK38-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to i64*
-// CHECK38-NEXT:    store i64 [[TMP4]], i64* [[TMP11]], align 8
-// CHECK38-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK38-NEXT:    store i64 4, i64* [[TMP12]], align 8
+// CHECK38-NEXT:    [[TMP8:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK38-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP8]], i8* align 8 bitcast ([4 x i64]* @.offload_sizes to i8*), i64 32, i1 false)
+// CHECK38-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK38-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK38-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
+// CHECK38-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK38-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64*
+// CHECK38-NEXT:    store i64 [[TMP4]], i64* [[TMP12]], align 8
 // CHECK38-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK38-NEXT:    store i8* null, i8** [[TMP13]], align 8
 // CHECK38-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -7693,58 +7657,54 @@ int main (int argc, char **argv) {
 // CHECK38-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK38-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i64*
 // CHECK38-NEXT:    store i64 [[TMP6]], i64* [[TMP17]], align 8
-// CHECK38-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK38-NEXT:    store i64 4, i64* [[TMP18]], align 8
-// CHECK38-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK38-NEXT:    store i8* null, i8** [[TMP19]], align 8
-// CHECK38-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK38-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
-// CHECK38-NEXT:    store i64 [[TMP1]], i64* [[TMP21]], align 8
-// CHECK38-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK38-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64*
-// CHECK38-NEXT:    store i64 [[TMP1]], i64* [[TMP23]], align 8
-// CHECK38-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK38-NEXT:    store i64 8, i64* [[TMP24]], align 8
-// CHECK38-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK38-NEXT:    store i8* null, i8** [[TMP25]], align 8
-// CHECK38-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK38-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK38-NEXT:    store i8* null, i8** [[TMP18]], align 8
+// CHECK38-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK38-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i64*
+// CHECK38-NEXT:    store i64 [[TMP1]], i64* [[TMP20]], align 8
+// CHECK38-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK38-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i64*
+// CHECK38-NEXT:    store i64 [[TMP1]], i64* [[TMP22]], align 8
+// CHECK38-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK38-NEXT:    store i8* null, i8** [[TMP23]], align 8
+// CHECK38-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK38-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32**
+// CHECK38-NEXT:    store i32* [[VLA]], i32** [[TMP25]], align 8
+// CHECK38-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK38-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32**
 // CHECK38-NEXT:    store i32* [[VLA]], i32** [[TMP27]], align 8
-// CHECK38-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK38-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32**
-// CHECK38-NEXT:    store i32* [[VLA]], i32** [[TMP29]], align 8
-// CHECK38-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK38-NEXT:    store i64 [[TMP7]], i64* [[TMP30]], align 8
-// CHECK38-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK38-NEXT:    store i8* null, i8** [[TMP31]], align 8
-// CHECK38-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK38-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK38-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK38-NEXT:    [[TMP35:%.*]] = load i32, i32* [[N]], align 4
-// CHECK38-NEXT:    store i32 [[TMP35]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK38-NEXT:    [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK38-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP36]], 0
+// CHECK38-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK38-NEXT:    store i64 [[TMP7]], i64* [[TMP28]], align 8
+// CHECK38-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK38-NEXT:    store i8* null, i8** [[TMP29]], align 8
+// CHECK38-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK38-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK38-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK38-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK38-NEXT:    store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK38-NEXT:    [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK38-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP34]], 0
 // CHECK38-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK38-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK38-NEXT:    store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK38-NEXT:    [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK38-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP37]], 1
-// CHECK38-NEXT:    [[TMP38:%.*]] = zext i32 [[ADD]] to i64
-// CHECK38-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP38]])
-// CHECK38-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192.region_id, i32 4, i8** [[TMP32]], i8** [[TMP33]], i64* [[TMP34]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK38-NEXT:    [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0
-// CHECK38-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK38-NEXT:    [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK38-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP35]], 1
+// CHECK38-NEXT:    [[TMP36:%.*]] = zext i32 [[ADD]] to i64
+// CHECK38-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP36]])
+// CHECK38-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192.region_id, i32 4, i8** [[TMP30]], i8** [[TMP31]], i64* [[TMP32]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK38-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK38-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK38:       omp_offload.failed:
 // CHECK38-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192(i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK38-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK38:       omp_offload.cont:
-// CHECK38-NEXT:    [[TMP41:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK38-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP41]])
+// CHECK38-NEXT:    [[TMP39:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK38-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP39]])
 // CHECK38-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK38-NEXT:    [[TMP42:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK38-NEXT:    call void @llvm.stackrestore(i8* [[TMP42]])
-// CHECK38-NEXT:    [[TMP43:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK38-NEXT:    ret i32 [[TMP43]]
+// CHECK38-NEXT:    [[TMP40:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK38-NEXT:    call void @llvm.stackrestore(i8* [[TMP40]])
+// CHECK38-NEXT:    [[TMP41:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK38-NEXT:    ret i32 [[TMP41]]
 //
 //
 // CHECK38-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192
@@ -7921,7 +7881,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK38-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK38-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK38-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK38-NEXT:  entry:
 // CHECK38-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK38-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -7972,7 +7932,7 @@ int main (int argc, char **argv) {
 // CHECK38-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK38-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TE]], align 4
 // CHECK38-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK38-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l181.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 1)
+// CHECK38-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l181.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 1)
 // CHECK38-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK38-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK38:       omp_offload.failed:
@@ -8077,7 +8037,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK38-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK38-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK38-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK38-NEXT:  entry:
 // CHECK38-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK38-NEXT:    ret void
@@ -8118,14 +8078,14 @@ int main (int argc, char **argv) {
 // CHECK39-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK39-NEXT:    [[TMP6:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK39-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
-// CHECK39-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK39-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK39-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK39-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK39-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to i32*
-// CHECK39-NEXT:    store i32 [[TMP3]], i32* [[TMP11]], align 4
-// CHECK39-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK39-NEXT:    store i64 4, i64* [[TMP12]], align 4
+// CHECK39-NEXT:    [[TMP8:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK39-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes to i8*), i32 32, i1 false)
+// CHECK39-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK39-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK39-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
+// CHECK39-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK39-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
+// CHECK39-NEXT:    store i32 [[TMP3]], i32* [[TMP12]], align 4
 // CHECK39-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK39-NEXT:    store i8* null, i8** [[TMP13]], align 4
 // CHECK39-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -8134,58 +8094,54 @@ int main (int argc, char **argv) {
 // CHECK39-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK39-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32*
 // CHECK39-NEXT:    store i32 [[TMP5]], i32* [[TMP17]], align 4
-// CHECK39-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK39-NEXT:    store i64 4, i64* [[TMP18]], align 4
-// CHECK39-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK39-NEXT:    store i8* null, i8** [[TMP19]], align 4
-// CHECK39-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK39-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32*
-// CHECK39-NEXT:    store i32 [[TMP0]], i32* [[TMP21]], align 4
-// CHECK39-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK39-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
-// CHECK39-NEXT:    store i32 [[TMP0]], i32* [[TMP23]], align 4
-// CHECK39-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK39-NEXT:    store i64 4, i64* [[TMP24]], align 4
-// CHECK39-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK39-NEXT:    store i8* null, i8** [[TMP25]], align 4
-// CHECK39-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK39-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK39-NEXT:    store i8* null, i8** [[TMP18]], align 4
+// CHECK39-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK39-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32*
+// CHECK39-NEXT:    store i32 [[TMP0]], i32* [[TMP20]], align 4
+// CHECK39-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK39-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
+// CHECK39-NEXT:    store i32 [[TMP0]], i32* [[TMP22]], align 4
+// CHECK39-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK39-NEXT:    store i8* null, i8** [[TMP23]], align 4
+// CHECK39-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK39-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32**
+// CHECK39-NEXT:    store i32* [[VLA]], i32** [[TMP25]], align 4
+// CHECK39-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK39-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32**
 // CHECK39-NEXT:    store i32* [[VLA]], i32** [[TMP27]], align 4
-// CHECK39-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK39-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32**
-// CHECK39-NEXT:    store i32* [[VLA]], i32** [[TMP29]], align 4
-// CHECK39-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK39-NEXT:    store i64 [[TMP7]], i64* [[TMP30]], align 4
-// CHECK39-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK39-NEXT:    store i8* null, i8** [[TMP31]], align 4
-// CHECK39-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK39-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK39-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK39-NEXT:    [[TMP35:%.*]] = load i32, i32* [[N]], align 4
-// CHECK39-NEXT:    store i32 [[TMP35]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK39-NEXT:    [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK39-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP36]], 0
+// CHECK39-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK39-NEXT:    store i64 [[TMP7]], i64* [[TMP28]], align 4
+// CHECK39-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK39-NEXT:    store i8* null, i8** [[TMP29]], align 4
+// CHECK39-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK39-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK39-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK39-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK39-NEXT:    store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK39-NEXT:    [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK39-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP34]], 0
 // CHECK39-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK39-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK39-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK39-NEXT:    [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK39-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP37]], 1
-// CHECK39-NEXT:    [[TMP38:%.*]] = zext i32 [[ADD]] to i64
-// CHECK39-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP38]])
-// CHECK39-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192.region_id, i32 4, i8** [[TMP32]], i8** [[TMP33]], i64* [[TMP34]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK39-NEXT:    [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0
-// CHECK39-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK39-NEXT:    [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK39-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP35]], 1
+// CHECK39-NEXT:    [[TMP36:%.*]] = zext i32 [[ADD]] to i64
+// CHECK39-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP36]])
+// CHECK39-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192.region_id, i32 4, i8** [[TMP30]], i8** [[TMP31]], i64* [[TMP32]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK39-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK39-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK39:       omp_offload.failed:
 // CHECK39-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192(i32 [[TMP3]], i32 [[TMP5]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK39-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK39:       omp_offload.cont:
-// CHECK39-NEXT:    [[TMP41:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK39-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP41]])
+// CHECK39-NEXT:    [[TMP39:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK39-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP39]])
 // CHECK39-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK39-NEXT:    [[TMP42:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK39-NEXT:    call void @llvm.stackrestore(i8* [[TMP42]])
-// CHECK39-NEXT:    [[TMP43:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK39-NEXT:    ret i32 [[TMP43]]
+// CHECK39-NEXT:    [[TMP40:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK39-NEXT:    call void @llvm.stackrestore(i8* [[TMP40]])
+// CHECK39-NEXT:    [[TMP41:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK39-NEXT:    ret i32 [[TMP41]]
 //
 //
 // CHECK39-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192
@@ -8358,7 +8314,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK39-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK39-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK39-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK39-NEXT:  entry:
 // CHECK39-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK39-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -8407,7 +8363,7 @@ int main (int argc, char **argv) {
 // CHECK39-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK39-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TE]], align 4
 // CHECK39-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK39-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l181.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 1)
+// CHECK39-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l181.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 1)
 // CHECK39-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK39-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK39:       omp_offload.failed:
@@ -8509,7 +8465,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK39-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK39-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK39-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK39-NEXT:  entry:
 // CHECK39-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK39-NEXT:    ret void
@@ -8550,14 +8506,14 @@ int main (int argc, char **argv) {
 // CHECK40-NEXT:    [[TMP5:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK40-NEXT:    [[TMP6:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK40-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
-// CHECK40-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK40-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK40-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK40-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK40-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to i32*
-// CHECK40-NEXT:    store i32 [[TMP3]], i32* [[TMP11]], align 4
-// CHECK40-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK40-NEXT:    store i64 4, i64* [[TMP12]], align 4
+// CHECK40-NEXT:    [[TMP8:%.*]] = bitcast [4 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK40-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 bitcast ([4 x i64]* @.offload_sizes to i8*), i32 32, i1 false)
+// CHECK40-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK40-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK40-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
+// CHECK40-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK40-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
+// CHECK40-NEXT:    store i32 [[TMP3]], i32* [[TMP12]], align 4
 // CHECK40-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK40-NEXT:    store i8* null, i8** [[TMP13]], align 4
 // CHECK40-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -8566,58 +8522,54 @@ int main (int argc, char **argv) {
 // CHECK40-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK40-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32*
 // CHECK40-NEXT:    store i32 [[TMP5]], i32* [[TMP17]], align 4
-// CHECK40-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK40-NEXT:    store i64 4, i64* [[TMP18]], align 4
-// CHECK40-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK40-NEXT:    store i8* null, i8** [[TMP19]], align 4
-// CHECK40-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK40-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32*
-// CHECK40-NEXT:    store i32 [[TMP0]], i32* [[TMP21]], align 4
-// CHECK40-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK40-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
-// CHECK40-NEXT:    store i32 [[TMP0]], i32* [[TMP23]], align 4
-// CHECK40-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK40-NEXT:    store i64 4, i64* [[TMP24]], align 4
-// CHECK40-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK40-NEXT:    store i8* null, i8** [[TMP25]], align 4
-// CHECK40-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK40-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK40-NEXT:    store i8* null, i8** [[TMP18]], align 4
+// CHECK40-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK40-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32*
+// CHECK40-NEXT:    store i32 [[TMP0]], i32* [[TMP20]], align 4
+// CHECK40-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK40-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32*
+// CHECK40-NEXT:    store i32 [[TMP0]], i32* [[TMP22]], align 4
+// CHECK40-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK40-NEXT:    store i8* null, i8** [[TMP23]], align 4
+// CHECK40-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK40-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32**
+// CHECK40-NEXT:    store i32* [[VLA]], i32** [[TMP25]], align 4
+// CHECK40-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK40-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32**
 // CHECK40-NEXT:    store i32* [[VLA]], i32** [[TMP27]], align 4
-// CHECK40-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK40-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32**
-// CHECK40-NEXT:    store i32* [[VLA]], i32** [[TMP29]], align 4
-// CHECK40-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK40-NEXT:    store i64 [[TMP7]], i64* [[TMP30]], align 4
-// CHECK40-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK40-NEXT:    store i8* null, i8** [[TMP31]], align 4
-// CHECK40-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK40-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK40-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK40-NEXT:    [[TMP35:%.*]] = load i32, i32* [[N]], align 4
-// CHECK40-NEXT:    store i32 [[TMP35]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK40-NEXT:    [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK40-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP36]], 0
+// CHECK40-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
+// CHECK40-NEXT:    store i64 [[TMP7]], i64* [[TMP28]], align 4
+// CHECK40-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK40-NEXT:    store i8* null, i8** [[TMP29]], align 4
+// CHECK40-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK40-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK40-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK40-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK40-NEXT:    store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK40-NEXT:    [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK40-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP34]], 0
 // CHECK40-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK40-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK40-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK40-NEXT:    [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK40-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP37]], 1
-// CHECK40-NEXT:    [[TMP38:%.*]] = zext i32 [[ADD]] to i64
-// CHECK40-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP38]])
-// CHECK40-NEXT:    [[TMP39:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192.region_id, i32 4, i8** [[TMP32]], i8** [[TMP33]], i64* [[TMP34]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK40-NEXT:    [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0
-// CHECK40-NEXT:    br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK40-NEXT:    [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK40-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP35]], 1
+// CHECK40-NEXT:    [[TMP36:%.*]] = zext i32 [[ADD]] to i64
+// CHECK40-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP36]])
+// CHECK40-NEXT:    [[TMP37:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192.region_id, i32 4, i8** [[TMP30]], i8** [[TMP31]], i64* [[TMP32]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK40-NEXT:    [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0
+// CHECK40-NEXT:    br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK40:       omp_offload.failed:
 // CHECK40-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192(i32 [[TMP3]], i32 [[TMP5]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK40-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK40:       omp_offload.cont:
-// CHECK40-NEXT:    [[TMP41:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK40-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP41]])
+// CHECK40-NEXT:    [[TMP39:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK40-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP39]])
 // CHECK40-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK40-NEXT:    [[TMP42:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK40-NEXT:    call void @llvm.stackrestore(i8* [[TMP42]])
-// CHECK40-NEXT:    [[TMP43:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK40-NEXT:    ret i32 [[TMP43]]
+// CHECK40-NEXT:    [[TMP40:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK40-NEXT:    call void @llvm.stackrestore(i8* [[TMP40]])
+// CHECK40-NEXT:    [[TMP41:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK40-NEXT:    ret i32 [[TMP41]]
 //
 //
 // CHECK40-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l192
@@ -8790,7 +8742,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK40-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK40-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK40-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK40-NEXT:  entry:
 // CHECK40-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK40-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -8839,7 +8791,7 @@ int main (int argc, char **argv) {
 // CHECK40-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK40-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TE]], align 4
 // CHECK40-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK40-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l181.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 1)
+// CHECK40-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l181.region_id, i32 3, i8** [[TMP19]], i8** [[TMP20]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 [[TMP21]], i32 1)
 // CHECK40-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
 // CHECK40-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK40:       omp_offload.failed:
@@ -8941,7 +8893,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK40-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK40-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK40-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK40-NEXT:  entry:
 // CHECK40-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK40-NEXT:    ret void

diff  --git a/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp
index c809a5b9105bf..a10dc9ae1ed3e 100644
--- a/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp
@@ -29,7 +29,7 @@ struct SS{
     #pragma omp teams distribute simd collapse(2)
     for(int i = 0; i < X; i++) {
       for(int j = 0; j < Y; j++) {
-	a[i][j] = (T)0;
+        a[i][j] = (T)0;
       }
     }
 
@@ -1029,14 +1029,14 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP9:%.*]] = load i64, i64* [[M_CASTED]], align 8
 // CHECK9-NEXT:    [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]]
 // CHECK9-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
-// CHECK9-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP13]], align 8
-// CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP15]], align 8
-// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP16]], align 8
+// CHECK9-NEXT:    [[TMP12:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP12]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes to i8*), i64 40, i1 false)
+// CHECK9-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP14]], align 8
+// CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP7]], i64* [[TMP16]], align 8
 // CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK9-NEXT:    store i8* null, i8** [[TMP17]], align 8
 // CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1045,75 +1045,69 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP9]], i64* [[TMP21]], align 8
-// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 4, i64* [[TMP22]], align 8
-// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP25]], align 8
-// CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP27]], align 8
-// CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 8, i64* [[TMP28]], align 8
-// CHECK9-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP29]], align 8
-// CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP24]], align 8
+// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP26]], align 8
+// CHECK9-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP27]], align 8
+// CHECK9-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK9-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP3]], i64* [[TMP29]], align 8
+// CHECK9-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK9-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP3]], i64* [[TMP31]], align 8
-// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK9-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP3]], i64* [[TMP33]], align 8
-// CHECK9-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK9-NEXT:    store i64 8, i64* [[TMP34]], align 8
-// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK9-NEXT:    store i8* null, i8** [[TMP35]], align 8
-// CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP37]], align 8
-// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK9-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP39]], align 8
-// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK9-NEXT:    store i64 [[TMP11]], i64* [[TMP40]], align 8
-// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK9-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP45:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[TMP46:%.*]] = load i32, i32* [[M]], align 4
-// CHECK9-NEXT:    store i32 [[TMP46]], i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK9-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK9-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK9-NEXT:    store i8* null, i8** [[TMP32]], align 8
+// CHECK9-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP34]], align 8
+// CHECK9-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK9-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 8
+// CHECK9-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK9-NEXT:    store i64 [[TMP11]], i64* [[TMP37]], align 8
+// CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK9-NEXT:    store i8* null, i8** [[TMP38]], align 8
+// CHECK9-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP42:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[TMP43:%.*]] = load i32, i32* [[M]], align 4
+// CHECK9-NEXT:    store i32 [[TMP43]], i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK9-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK9-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK9-NEXT:    [[CONV5:%.*]] = sext i32 [[DIV]] to i64
-// CHECK9-NEXT:    [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK9-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP48]], 0
+// CHECK9-NEXT:    [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK9-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0
 // CHECK9-NEXT:    [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
 // CHECK9-NEXT:    [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
 // CHECK9-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]]
 // CHECK9-NEXT:    [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK9-NEXT:    store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK9-NEXT:    [[TMP49:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP49]], 1
+// CHECK9-NEXT:    [[TMP46:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
+// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP46]], 1
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK9-NEXT:    [[TMP50:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, i32 5, i8** [[TMP42]], i8** [[TMP43]], i64* [[TMP44]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK9-NEXT:    [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
-// CHECK9-NEXT:    br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[TMP47:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, i32 5, i8** [[TMP39]], i8** [[TMP40]], i64* [[TMP41]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK9-NEXT:    [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0
+// CHECK9-NEXT:    br i1 [[TMP48]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[TMP52:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP52]])
+// CHECK9-NEXT:    [[TMP49:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP49]])
 // CHECK9-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    [[TMP53:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP53]])
-// CHECK9-NEXT:    [[TMP54:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    ret i32 [[TMP54]]
+// CHECK9-NEXT:    [[TMP50:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP50]])
+// CHECK9-NEXT:    [[TMP51:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    ret i32 [[TMP51]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83
@@ -1305,7 +1299,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -1326,7 +1320,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 20)
-// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK9-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK9-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
@@ -1436,7 +1430,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK9-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK9-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK9-NEXT:    ret void
@@ -1489,14 +1483,14 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP9:%.*]] = load i64, i64* [[M_CASTED]], align 8
 // CHECK10-NEXT:    [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]]
 // CHECK10-NEXT:    [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
-// CHECK10-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP13]], align 8
-// CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP15]], align 8
-// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP16]], align 8
+// CHECK10-NEXT:    [[TMP12:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP12]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes to i8*), i64 40, i1 false)
+// CHECK10-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP14]], align 8
+// CHECK10-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP7]], i64* [[TMP16]], align 8
 // CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK10-NEXT:    store i8* null, i8** [[TMP17]], align 8
 // CHECK10-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1505,75 +1499,69 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP9]], i64* [[TMP21]], align 8
-// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 4, i64* [[TMP22]], align 8
-// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP25]], align 8
-// CHECK10-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP27]], align 8
-// CHECK10-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 8, i64* [[TMP28]], align 8
-// CHECK10-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP29]], align 8
-// CHECK10-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP24]], align 8
+// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP26]], align 8
+// CHECK10-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP27]], align 8
+// CHECK10-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK10-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP3]], i64* [[TMP29]], align 8
+// CHECK10-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK10-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP3]], i64* [[TMP31]], align 8
-// CHECK10-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK10-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP3]], i64* [[TMP33]], align 8
-// CHECK10-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK10-NEXT:    store i64 8, i64* [[TMP34]], align 8
-// CHECK10-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK10-NEXT:    store i8* null, i8** [[TMP35]], align 8
-// CHECK10-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK10-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP37]], align 8
-// CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK10-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP39]], align 8
-// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK10-NEXT:    store i64 [[TMP11]], i64* [[TMP40]], align 8
-// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK10-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK10-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP45:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[TMP46:%.*]] = load i32, i32* [[M]], align 4
-// CHECK10-NEXT:    store i32 [[TMP46]], i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK10-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK10-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK10-NEXT:    store i8* null, i8** [[TMP32]], align 8
+// CHECK10-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK10-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP34]], align 8
+// CHECK10-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK10-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 8
+// CHECK10-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK10-NEXT:    store i64 [[TMP11]], i64* [[TMP37]], align 8
+// CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK10-NEXT:    store i8* null, i8** [[TMP38]], align 8
+// CHECK10-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP42:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[TMP43:%.*]] = load i32, i32* [[M]], align 4
+// CHECK10-NEXT:    store i32 [[TMP43]], i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK10-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK10-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK10-NEXT:    [[CONV5:%.*]] = sext i32 [[DIV]] to i64
-// CHECK10-NEXT:    [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK10-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP48]], 0
+// CHECK10-NEXT:    [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK10-NEXT:    [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0
 // CHECK10-NEXT:    [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1
 // CHECK10-NEXT:    [[CONV8:%.*]] = sext i32 [[DIV7]] to i64
 // CHECK10-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]]
 // CHECK10-NEXT:    [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK10-NEXT:    store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK10-NEXT:    [[TMP49:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
-// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP49]], 1
+// CHECK10-NEXT:    [[TMP46:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_4]], align 8
+// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP46]], 1
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK10-NEXT:    [[TMP50:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, i32 5, i8** [[TMP42]], i8** [[TMP43]], i64* [[TMP44]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK10-NEXT:    [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
-// CHECK10-NEXT:    br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK10-NEXT:    [[TMP47:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, i32 5, i8** [[TMP39]], i8** [[TMP40]], i64* [[TMP41]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK10-NEXT:    [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0
+// CHECK10-NEXT:    br i1 [[TMP48]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
 // CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK10:       omp_offload.cont:
-// CHECK10-NEXT:    [[TMP52:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP52]])
+// CHECK10-NEXT:    [[TMP49:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP49]])
 // CHECK10-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    [[TMP53:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP53]])
-// CHECK10-NEXT:    [[TMP54:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    ret i32 [[TMP54]]
+// CHECK10-NEXT:    [[TMP50:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP50]])
+// CHECK10-NEXT:    [[TMP51:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK10-NEXT:    ret i32 [[TMP51]]
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83
@@ -1765,7 +1753,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -1786,7 +1774,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 20)
-// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK10-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK10-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
@@ -1896,7 +1884,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK10-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK10-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK10-NEXT:    ret void
@@ -1946,14 +1934,14 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP0]], [[TMP1]]
 // CHECK11-NEXT:    [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4
 // CHECK11-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
-// CHECK11-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP12]], align 4
-// CHECK11-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP14]], align 4
-// CHECK11-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP15]], align 4
+// CHECK11-NEXT:    [[TMP11:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes to i8*), i32 40, i1 false)
+// CHECK11-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP13]], align 4
+// CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP5]], i32* [[TMP15]], align 4
 // CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK11-NEXT:    store i8* null, i8** [[TMP16]], align 4
 // CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -1962,75 +1950,69 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP7]], i32* [[TMP20]], align 4
-// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP21]], align 4
-// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP22]], align 4
-// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP24]], align 4
-// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP26]], align 4
-// CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 4, i64* [[TMP27]], align 4
-// CHECK11-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP28]], align 4
-// CHECK11-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP21]], align 4
+// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP23]], align 4
+// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP25]], align 4
+// CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP26]], align 4
+// CHECK11-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK11-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP1]], i32* [[TMP28]], align 4
+// CHECK11-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK11-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP1]], i32* [[TMP30]], align 4
-// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK11-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP1]], i32* [[TMP32]], align 4
-// CHECK11-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK11-NEXT:    store i64 4, i64* [[TMP33]], align 4
-// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK11-NEXT:    store i8* null, i8** [[TMP34]], align 4
-// CHECK11-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 4
-// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK11-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP38]], align 4
-// CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK11-NEXT:    store i64 [[TMP10]], i64* [[TMP39]], align 4
-// CHECK11-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK11-NEXT:    store i8* null, i8** [[TMP40]], align 4
-// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP44:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP44]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[TMP45:%.*]] = load i32, i32* [[M]], align 4
-// CHECK11-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK11-NEXT:    [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP46]], 0
+// CHECK11-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK11-NEXT:    store i8* null, i8** [[TMP31]], align 4
+// CHECK11-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP33]], align 4
+// CHECK11-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK11-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP35]], align 4
+// CHECK11-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK11-NEXT:    store i64 [[TMP10]], i64* [[TMP36]], align 4
+// CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK11-NEXT:    store i8* null, i8** [[TMP37]], align 4
+// CHECK11-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP41:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP41]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[TMP42:%.*]] = load i32, i32* [[M]], align 4
+// CHECK11-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT:    [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0
 // CHECK11-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK11-NEXT:    [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK11-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK11-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK11-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK11-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK11-NEXT:    [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
 // CHECK11-NEXT:    [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
 // CHECK11-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
 // CHECK11-NEXT:    [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK11-NEXT:    store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK11-NEXT:    [[TMP48:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP48]], 1
+// CHECK11-NEXT:    [[TMP45:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP45]], 1
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK11-NEXT:    [[TMP49:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, i32 5, i8** [[TMP41]], i8** [[TMP42]], i64* [[TMP43]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK11-NEXT:    [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0
-// CHECK11-NEXT:    br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[TMP46:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, i32 5, i8** [[TMP38]], i8** [[TMP39]], i64* [[TMP40]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK11-NEXT:    [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0
+// CHECK11-NEXT:    br i1 [[TMP47]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[TMP51:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP51]])
+// CHECK11-NEXT:    [[TMP48:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP48]])
 // CHECK11-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    [[TMP52:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP52]])
-// CHECK11-NEXT:    [[TMP53:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    ret i32 [[TMP53]]
+// CHECK11-NEXT:    [[TMP49:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP49]])
+// CHECK11-NEXT:    [[TMP50:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK11-NEXT:    ret i32 [[TMP50]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83
@@ -2218,7 +2200,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -2239,7 +2221,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 20)
-// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK11-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK11-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
@@ -2347,7 +2329,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK11-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK11-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK11-NEXT:    ret void
@@ -2397,14 +2379,14 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP8:%.*]] = mul nuw i32 [[TMP0]], [[TMP1]]
 // CHECK12-NEXT:    [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4
 // CHECK12-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
-// CHECK12-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP12]], align 4
-// CHECK12-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP14]], align 4
-// CHECK12-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP15]], align 4
+// CHECK12-NEXT:    [[TMP11:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes to i8*), i32 40, i1 false)
+// CHECK12-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP13]], align 4
+// CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP5]], i32* [[TMP15]], align 4
 // CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK12-NEXT:    store i8* null, i8** [[TMP16]], align 4
 // CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2413,75 +2395,69 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP7]], i32* [[TMP20]], align 4
-// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP21]], align 4
-// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP22]], align 4
-// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP24]], align 4
-// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP26]], align 4
-// CHECK12-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 4, i64* [[TMP27]], align 4
-// CHECK12-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP28]], align 4
-// CHECK12-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP21]], align 4
+// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP23]], align 4
+// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP25]], align 4
+// CHECK12-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP26]], align 4
+// CHECK12-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK12-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP1]], i32* [[TMP28]], align 4
+// CHECK12-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK12-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP1]], i32* [[TMP30]], align 4
-// CHECK12-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK12-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP1]], i32* [[TMP32]], align 4
-// CHECK12-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK12-NEXT:    store i64 4, i64* [[TMP33]], align 4
-// CHECK12-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK12-NEXT:    store i8* null, i8** [[TMP34]], align 4
-// CHECK12-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK12-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP36]], align 4
-// CHECK12-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK12-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP38]], align 4
-// CHECK12-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK12-NEXT:    store i64 [[TMP10]], i64* [[TMP39]], align 4
-// CHECK12-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK12-NEXT:    store i8* null, i8** [[TMP40]], align 4
-// CHECK12-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP44:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP44]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[TMP45:%.*]] = load i32, i32* [[M]], align 4
-// CHECK12-NEXT:    store i32 [[TMP45]], i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK12-NEXT:    [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP46]], 0
+// CHECK12-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK12-NEXT:    store i8* null, i8** [[TMP31]], align 4
+// CHECK12-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK12-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP33]], align 4
+// CHECK12-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK12-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP35]], align 4
+// CHECK12-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
+// CHECK12-NEXT:    store i64 [[TMP10]], i64* [[TMP36]], align 4
+// CHECK12-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK12-NEXT:    store i8* null, i8** [[TMP37]], align 4
+// CHECK12-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP41:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP41]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[TMP42:%.*]] = load i32, i32* [[M]], align 4
+// CHECK12-NEXT:    store i32 [[TMP42]], i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK12-NEXT:    [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0
 // CHECK12-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK12-NEXT:    [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK12-NEXT:    [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK12-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP47]], 0
+// CHECK12-NEXT:    [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK12-NEXT:    [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0
 // CHECK12-NEXT:    [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
 // CHECK12-NEXT:    [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
 // CHECK12-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
 // CHECK12-NEXT:    [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
 // CHECK12-NEXT:    store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK12-NEXT:    [[TMP48:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP48]], 1
+// CHECK12-NEXT:    [[TMP45:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP45]], 1
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[ADD]])
-// CHECK12-NEXT:    [[TMP49:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, i32 5, i8** [[TMP41]], i8** [[TMP42]], i64* [[TMP43]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK12-NEXT:    [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0
-// CHECK12-NEXT:    br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK12-NEXT:    [[TMP46:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, i32 5, i8** [[TMP38]], i8** [[TMP39]], i64* [[TMP40]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK12-NEXT:    [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0
+// CHECK12-NEXT:    br i1 [[TMP47]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
 // CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK12:       omp_offload.cont:
-// CHECK12-NEXT:    [[TMP51:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP51]])
+// CHECK12-NEXT:    [[TMP48:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP48]])
 // CHECK12-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    [[TMP52:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP52]])
-// CHECK12-NEXT:    [[TMP53:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    ret i32 [[TMP53]]
+// CHECK12-NEXT:    [[TMP49:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP49]])
+// CHECK12-NEXT:    [[TMP50:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK12-NEXT:    ret i32 [[TMP50]]
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83
@@ -2669,7 +2645,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_
-// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK12-NEXT:    [[A:%.*]] = alloca [10 x [2 x i32]], align 4
@@ -2690,7 +2666,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 20)
-// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK12-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK12-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
@@ -2798,7 +2774,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK12-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK12-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK12-NEXT:    ret void

diff  --git a/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp
index e7183e046841c..eba16f416f0fc 100644
--- a/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp
@@ -2139,14 +2139,14 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK9-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK9-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK9-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK9-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK9-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK9-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK9-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK9-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK9-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK9-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -2155,160 +2155,154 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK9-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK9-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK9-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK9-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK9-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK9-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK9-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK9-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK9-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK9-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK9-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK9-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK9-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK9-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK9-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK9-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK9-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK9-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK9-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK9-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
 // CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK9:       omp_offload.cont:
-// CHECK9-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK9-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK9-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK9-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK9-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK9-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK9-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK9-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.2 to i8*), i64 24, i1 false)
 // CHECK9-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK9-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK9-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK9-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK9-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK9-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK9-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK9-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK9-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK9-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK9-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK9-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK9-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK9-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK9-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK9-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK9-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK9-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK9-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK9-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK9-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK9-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK9-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK9-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK9-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK9-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK9-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP60]])
-// CHECK9-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK9-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK9-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK9-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK9-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK9-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP58]])
+// CHECK9-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK9-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK9-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK9:       omp_offload.failed16:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK9:       omp_offload.cont17:
-// CHECK9-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    [[TMP61:%.*]] = load i32, i32* [[N]], align 4
 // CHECK9-NEXT:    [[CONV19:%.*]] = bitcast i64* [[N_CASTED18]] to i32*
-// CHECK9-NEXT:    store i32 [[TMP63]], i32* [[CONV19]], align 4
-// CHECK9-NEXT:    [[TMP64:%.*]] = load i64, i64* [[N_CASTED18]], align 8
-// CHECK9-NEXT:    [[TMP65:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK9-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP64]], i64* [[TMP67]], align 8
-// CHECK9-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP64]], i64* [[TMP69]], align 8
-// CHECK9-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES23]], i32 0, i32 0
-// CHECK9-NEXT:    store i64 4, i64* [[TMP70]], align 8
-// CHECK9-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0
-// CHECK9-NEXT:    store i8* null, i8** [[TMP71]], align 8
-// CHECK9-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 1
+// CHECK9-NEXT:    store i32 [[TMP61]], i32* [[CONV19]], align 4
+// CHECK9-NEXT:    [[TMP62:%.*]] = load i64, i64* [[N_CASTED18]], align 8
+// CHECK9-NEXT:    [[TMP63:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK9-NEXT:    [[TMP64:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES23]] to i8*
+// CHECK9-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP64]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.5 to i8*), i64 24, i1 false)
+// CHECK9-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP66:%.*]] = bitcast i8** [[TMP65]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP62]], i64* [[TMP66]], align 8
+// CHECK9-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP62]], i64* [[TMP68]], align 8
+// CHECK9-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0
+// CHECK9-NEXT:    store i8* null, i8** [[TMP69]], align 8
+// CHECK9-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 1
+// CHECK9-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
+// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP71]], align 8
+// CHECK9-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 1
 // CHECK9-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i64*
 // CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP73]], align 8
-// CHECK9-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 1
-// CHECK9-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i64*
-// CHECK9-NEXT:    store i64 [[TMP1]], i64* [[TMP75]], align 8
-// CHECK9-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES23]], i32 0, i32 1
-// CHECK9-NEXT:    store i64 8, i64* [[TMP76]], align 8
-// CHECK9-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
-// CHECK9-NEXT:    store i8* null, i8** [[TMP77]], align 8
-// CHECK9-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP79]], align 8
-// CHECK9-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
-// CHECK9-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
-// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 8
-// CHECK9-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES23]], i32 0, i32 2
-// CHECK9-NEXT:    store i64 [[TMP65]], i64* [[TMP82]], align 8
-// CHECK9-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
-// CHECK9-NEXT:    store i8* null, i8** [[TMP83]], align 8
-// CHECK9-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES23]], i32 0, i32 0
-// CHECK9-NEXT:    [[TMP87:%.*]] = load i32, i32* [[N]], align 4
-// CHECK9-NEXT:    store i32 [[TMP87]], i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK9-NEXT:    [[TMP88:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK9-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[TMP88]], 0
+// CHECK9-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
+// CHECK9-NEXT:    store i8* null, i8** [[TMP74]], align 8
+// CHECK9-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP76]], align 8
+// CHECK9-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
+// CHECK9-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32**
+// CHECK9-NEXT:    store i32* [[VLA]], i32** [[TMP78]], align 8
+// CHECK9-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES23]], i32 0, i32 2
+// CHECK9-NEXT:    store i64 [[TMP63]], i64* [[TMP79]], align 8
+// CHECK9-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
+// CHECK9-NEXT:    store i8* null, i8** [[TMP80]], align 8
+// CHECK9-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES23]], i32 0, i32 0
+// CHECK9-NEXT:    [[TMP84:%.*]] = load i32, i32* [[N]], align 4
+// CHECK9-NEXT:    store i32 [[TMP84]], i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK9-NEXT:    [[TMP85:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK9-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[TMP85]], 0
 // CHECK9-NEXT:    [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1
 // CHECK9-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[DIV28]], 1
 // CHECK9-NEXT:    store i32 [[SUB29]], i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK9-NEXT:    [[TMP89:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK9-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP89]], 1
-// CHECK9-NEXT:    [[TMP90:%.*]] = zext i32 [[ADD30]] to i64
-// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP90]])
-// CHECK9-NEXT:    [[TMP91:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110.region_id, i32 3, i8** [[TMP84]], i8** [[TMP85]], i64* [[TMP86]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK9-NEXT:    [[TMP92:%.*]] = icmp ne i32 [[TMP91]], 0
-// CHECK9-NEXT:    br i1 [[TMP92]], label [[OMP_OFFLOAD_FAILED31:%.*]], label [[OMP_OFFLOAD_CONT32:%.*]]
+// CHECK9-NEXT:    [[TMP86:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK9-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP86]], 1
+// CHECK9-NEXT:    [[TMP87:%.*]] = zext i32 [[ADD30]] to i64
+// CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP87]])
+// CHECK9-NEXT:    [[TMP88:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110.region_id, i32 3, i8** [[TMP81]], i8** [[TMP82]], i64* [[TMP83]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK9-NEXT:    [[TMP89:%.*]] = icmp ne i32 [[TMP88]], 0
+// CHECK9-NEXT:    br i1 [[TMP89]], label [[OMP_OFFLOAD_FAILED31:%.*]], label [[OMP_OFFLOAD_CONT32:%.*]]
 // CHECK9:       omp_offload.failed31:
-// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110(i64 [[TMP64]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK9-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110(i64 [[TMP62]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK9-NEXT:    br label [[OMP_OFFLOAD_CONT32]]
 // CHECK9:       omp_offload.cont32:
-// CHECK9-NEXT:    [[TMP93:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP93]])
+// CHECK9-NEXT:    [[TMP90:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK9-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP90]])
 // CHECK9-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    [[TMP94:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP94]])
-// CHECK9-NEXT:    [[TMP95:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK9-NEXT:    ret i32 [[TMP95]]
+// CHECK9-NEXT:    [[TMP91:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK9-NEXT:    call void @llvm.stackrestore(i8* [[TMP91]])
+// CHECK9-NEXT:    [[TMP92:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK9-NEXT:    ret i32 [[TMP92]]
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100
@@ -2577,11 +2571,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK9-NEXT:    store i32 [[TMP3]], i32* [[CONV1]], align 4
 // CHECK9-NEXT:    [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2710,7 +2704,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -2738,7 +2732,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l79.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l79.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK9-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK9-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK9:       omp_offload.failed:
@@ -2756,7 +2750,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK9-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK9-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK9-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK9-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK9:       omp_offload.failed5:
@@ -2774,7 +2768,7 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK9-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK9-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK9-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK9-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK9-NEXT:    [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
 // CHECK9-NEXT:    br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK9:       omp_offload.failed11:
@@ -2790,11 +2784,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK9-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2874,11 +2868,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK9-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -2958,11 +2952,11 @@ int main (int argc, char **argv) {
 // CHECK9-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK9-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK9-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10
+// CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK9-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3054,7 +3048,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK9-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK9-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK9-NEXT:    ret void
@@ -3108,14 +3102,14 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    store i32 [[TMP3]], i32* [[CONV]], align 4
 // CHECK10-NEXT:    [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK10-NEXT:    [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP7]], align 8
-// CHECK10-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP9]], align 8
-// CHECK10-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP10]], align 8
+// CHECK10-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes to i8*), i64 24, i1 false)
+// CHECK10-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP8]], align 8
+// CHECK10-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP4]], i64* [[TMP10]], align 8
 // CHECK10-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK10-NEXT:    store i8* null, i8** [[TMP11]], align 8
 // CHECK10-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -3124,160 +3118,154 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP15]], align 8
-// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 8, i64* [[TMP16]], align 8
-// CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP17]], align 8
-// CHECK10-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 8
-// CHECK10-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 8
-// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 8
-// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP23]], align 8
-// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK10-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP16]], align 8
+// CHECK10-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 8
+// CHECK10-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 8
+// CHECK10-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 8
+// CHECK10-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP22]], align 8
+// CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK10-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK10-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK10-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK10-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK10-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK10-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK10-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK10-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK10-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK10-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK10-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK10-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK10-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK10-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
 // CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100(i64 [[TMP4]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK10:       omp_offload.cont:
-// CHECK10-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
 // CHECK10-NEXT:    [[CONV4:%.*]] = bitcast i64* [[N_CASTED3]] to i32*
-// CHECK10-NEXT:    store i32 [[TMP33]], i32* [[CONV4]], align 4
-// CHECK10-NEXT:    [[TMP34:%.*]] = load i64, i64* [[N_CASTED3]], align 8
-// CHECK10-NEXT:    [[TMP35:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK10-NEXT:    store i32 [[TMP32]], i32* [[CONV4]], align 4
+// CHECK10-NEXT:    [[TMP33:%.*]] = load i64, i64* [[N_CASTED3]], align 8
+// CHECK10-NEXT:    [[TMP34:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK10-NEXT:    [[TMP35:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES8]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP35]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.2 to i8*), i64 24, i1 false)
 // CHECK10-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP34]], i64* [[TMP37]], align 8
+// CHECK10-NEXT:    store i64 [[TMP33]], i64* [[TMP37]], align 8
 // CHECK10-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP34]], i64* [[TMP39]], align 8
-// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP40]], align 8
-// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
-// CHECK10-NEXT:    store i8* null, i8** [[TMP41]], align 8
-// CHECK10-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
-// CHECK10-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP43]], align 8
-// CHECK10-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
-// CHECK10-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP45]], align 8
-// CHECK10-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 8, i64* [[TMP46]], align 8
-// CHECK10-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP47]], align 8
-// CHECK10-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP33]], i64* [[TMP39]], align 8
+// CHECK10-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0
+// CHECK10-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK10-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1
+// CHECK10-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP42]], align 8
+// CHECK10-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 1
+// CHECK10-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP44]], align 8
+// CHECK10-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK10-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP47]], align 8
+// CHECK10-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
 // CHECK10-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
 // CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP49]], align 8
-// CHECK10-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP51]], align 8
-// CHECK10-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 [[TMP35]], i64* [[TMP52]], align 8
-// CHECK10-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP53]], align 8
-// CHECK10-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP57:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP57]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK10-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK10-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP58]], 0
+// CHECK10-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP34]], i64* [[TMP50]], align 8
+// CHECK10-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK10-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS6]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES8]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP55:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP55]], i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK10-NEXT:    [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK10-NEXT:    [[SUB12:%.*]] = sub nsw i32 [[TMP56]], 0
 // CHECK10-NEXT:    [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
 // CHECK10-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[DIV13]], 1
 // CHECK10-NEXT:    store i32 [[SUB14]], i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK10-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK10-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP59]], 1
-// CHECK10-NEXT:    [[TMP60:%.*]] = zext i32 [[ADD15]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP60]])
-// CHECK10-NEXT:    [[TMP61:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105.region_id, i32 3, i8** [[TMP54]], i8** [[TMP55]], i64* [[TMP56]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK10-NEXT:    [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0
-// CHECK10-NEXT:    br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
+// CHECK10-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_11]], align 4
+// CHECK10-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP57]], 1
+// CHECK10-NEXT:    [[TMP58:%.*]] = zext i32 [[ADD15]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP58]])
+// CHECK10-NEXT:    [[TMP59:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105.region_id, i32 3, i8** [[TMP52]], i8** [[TMP53]], i64* [[TMP54]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK10-NEXT:    [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0
+// CHECK10-NEXT:    br i1 [[TMP60]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]]
 // CHECK10:       omp_offload.failed16:
-// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105(i64 [[TMP34]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105(i64 [[TMP33]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT17]]
 // CHECK10:       omp_offload.cont17:
-// CHECK10-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    [[TMP61:%.*]] = load i32, i32* [[N]], align 4
 // CHECK10-NEXT:    [[CONV19:%.*]] = bitcast i64* [[N_CASTED18]] to i32*
-// CHECK10-NEXT:    store i32 [[TMP63]], i32* [[CONV19]], align 4
-// CHECK10-NEXT:    [[TMP64:%.*]] = load i64, i64* [[N_CASTED18]], align 8
-// CHECK10-NEXT:    [[TMP65:%.*]] = mul nuw i64 [[TMP1]], 4
-// CHECK10-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP64]], i64* [[TMP67]], align 8
-// CHECK10-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP64]], i64* [[TMP69]], align 8
-// CHECK10-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES23]], i32 0, i32 0
-// CHECK10-NEXT:    store i64 4, i64* [[TMP70]], align 8
-// CHECK10-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0
-// CHECK10-NEXT:    store i8* null, i8** [[TMP71]], align 8
-// CHECK10-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 1
+// CHECK10-NEXT:    store i32 [[TMP61]], i32* [[CONV19]], align 4
+// CHECK10-NEXT:    [[TMP62:%.*]] = load i64, i64* [[N_CASTED18]], align 8
+// CHECK10-NEXT:    [[TMP63:%.*]] = mul nuw i64 [[TMP1]], 4
+// CHECK10-NEXT:    [[TMP64:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES23]] to i8*
+// CHECK10-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP64]], i8* align 8 bitcast ([3 x i64]* @.offload_sizes.5 to i8*), i64 24, i1 false)
+// CHECK10-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP66:%.*]] = bitcast i8** [[TMP65]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP62]], i64* [[TMP66]], align 8
+// CHECK10-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP62]], i64* [[TMP68]], align 8
+// CHECK10-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0
+// CHECK10-NEXT:    store i8* null, i8** [[TMP69]], align 8
+// CHECK10-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 1
+// CHECK10-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
+// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP71]], align 8
+// CHECK10-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 1
 // CHECK10-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i64*
 // CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP73]], align 8
-// CHECK10-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 1
-// CHECK10-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i64*
-// CHECK10-NEXT:    store i64 [[TMP1]], i64* [[TMP75]], align 8
-// CHECK10-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES23]], i32 0, i32 1
-// CHECK10-NEXT:    store i64 8, i64* [[TMP76]], align 8
-// CHECK10-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
-// CHECK10-NEXT:    store i8* null, i8** [[TMP77]], align 8
-// CHECK10-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP79]], align 8
-// CHECK10-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
-// CHECK10-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
-// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 8
-// CHECK10-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES23]], i32 0, i32 2
-// CHECK10-NEXT:    store i64 [[TMP65]], i64* [[TMP82]], align 8
-// CHECK10-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
-// CHECK10-NEXT:    store i8* null, i8** [[TMP83]], align 8
-// CHECK10-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES23]], i32 0, i32 0
-// CHECK10-NEXT:    [[TMP87:%.*]] = load i32, i32* [[N]], align 4
-// CHECK10-NEXT:    store i32 [[TMP87]], i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK10-NEXT:    [[TMP88:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
-// CHECK10-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[TMP88]], 0
+// CHECK10-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 1
+// CHECK10-NEXT:    store i8* null, i8** [[TMP74]], align 8
+// CHECK10-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP76:%.*]] = bitcast i8** [[TMP75]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP76]], align 8
+// CHECK10-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 2
+// CHECK10-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32**
+// CHECK10-NEXT:    store i32* [[VLA]], i32** [[TMP78]], align 8
+// CHECK10-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES23]], i32 0, i32 2
+// CHECK10-NEXT:    store i64 [[TMP63]], i64* [[TMP79]], align 8
+// CHECK10-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 2
+// CHECK10-NEXT:    store i8* null, i8** [[TMP80]], align 8
+// CHECK10-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS21]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES23]], i32 0, i32 0
+// CHECK10-NEXT:    [[TMP84:%.*]] = load i32, i32* [[N]], align 4
+// CHECK10-NEXT:    store i32 [[TMP84]], i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK10-NEXT:    [[TMP85:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_25]], align 4
+// CHECK10-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[TMP85]], 0
 // CHECK10-NEXT:    [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1
 // CHECK10-NEXT:    [[SUB29:%.*]] = sub nsw i32 [[DIV28]], 1
 // CHECK10-NEXT:    store i32 [[SUB29]], i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK10-NEXT:    [[TMP89:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
-// CHECK10-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP89]], 1
-// CHECK10-NEXT:    [[TMP90:%.*]] = zext i32 [[ADD30]] to i64
-// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP90]])
-// CHECK10-NEXT:    [[TMP91:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110.region_id, i32 3, i8** [[TMP84]], i8** [[TMP85]], i64* [[TMP86]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK10-NEXT:    [[TMP92:%.*]] = icmp ne i32 [[TMP91]], 0
-// CHECK10-NEXT:    br i1 [[TMP92]], label [[OMP_OFFLOAD_FAILED31:%.*]], label [[OMP_OFFLOAD_CONT32:%.*]]
+// CHECK10-NEXT:    [[TMP86:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_26]], align 4
+// CHECK10-NEXT:    [[ADD30:%.*]] = add nsw i32 [[TMP86]], 1
+// CHECK10-NEXT:    [[TMP87:%.*]] = zext i32 [[ADD30]] to i64
+// CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP87]])
+// CHECK10-NEXT:    [[TMP88:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110.region_id, i32 3, i8** [[TMP81]], i8** [[TMP82]], i64* [[TMP83]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK10-NEXT:    [[TMP89:%.*]] = icmp ne i32 [[TMP88]], 0
+// CHECK10-NEXT:    br i1 [[TMP89]], label [[OMP_OFFLOAD_FAILED31:%.*]], label [[OMP_OFFLOAD_CONT32:%.*]]
 // CHECK10:       omp_offload.failed31:
-// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110(i64 [[TMP64]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK10-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110(i64 [[TMP62]], i64 [[TMP1]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK10-NEXT:    br label [[OMP_OFFLOAD_CONT32]]
 // CHECK10:       omp_offload.cont32:
-// CHECK10-NEXT:    [[TMP93:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP93]])
+// CHECK10-NEXT:    [[TMP90:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK10-NEXT:    [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP90]])
 // CHECK10-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    [[TMP94:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP94]])
-// CHECK10-NEXT:    [[TMP95:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK10-NEXT:    ret i32 [[TMP95]]
+// CHECK10-NEXT:    [[TMP91:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK10-NEXT:    call void @llvm.stackrestore(i8* [[TMP91]])
+// CHECK10-NEXT:    [[TMP92:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK10-NEXT:    ret i32 [[TMP92]]
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100
@@ -3546,11 +3534,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32*
 // CHECK10-NEXT:    store i32 [[TMP3]], i32* [[CONV1]], align 4
 // CHECK10-NEXT:    [[TMP4:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32*, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[CONV]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP4]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..3
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..4
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3679,7 +3667,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK10-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -3707,7 +3695,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l79.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l79.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK10-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK10-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK10:       omp_offload.failed:
@@ -3725,7 +3713,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK10-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK10-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK10-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK10-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK10:       omp_offload.failed5:
@@ -3743,7 +3731,7 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK10-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK10-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK10-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK10-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK10-NEXT:    [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
 // CHECK10-NEXT:    br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK10:       omp_offload.failed11:
@@ -3759,11 +3747,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK10-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK10-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3843,11 +3831,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK10-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK10-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..7
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -3927,11 +3915,11 @@ int main (int argc, char **argv) {
 // CHECK10-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 8
 // CHECK10-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8
 // CHECK10-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK10-NEXT:    ret void
 //
 //
-// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..10
+// CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK10-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -4023,7 +4011,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK10-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK10-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK10-NEXT:  entry:
 // CHECK10-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK10-NEXT:    ret void
@@ -4076,14 +4064,14 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK11-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK11-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK11-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK11-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK11-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK11-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK11-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK11-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK11-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK11-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -4092,160 +4080,154 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK11-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK11-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK11-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK11-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK11-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK11-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK11-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK11-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK11-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK11-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK11-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK11-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK11-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK11-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK11-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK11-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK11-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK11-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK11-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK11-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK11-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
 // CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK11:       omp_offload.cont:
-// CHECK11-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK11-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK11-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK11-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK11-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK11-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK11-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK11-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK11-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.2 to i8*), i32 24, i1 false)
 // CHECK11-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK11-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK11-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK11-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK11-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK11-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK11-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK11-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK11-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK11-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK11-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK11-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK11-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK11-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK11-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK11-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK11-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK11-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK11-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK11-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK11-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK11-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK11-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK11-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK11-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK11-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK11-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK11-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK11-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK11-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK11-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP61]])
-// CHECK11-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK11-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK11-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK11-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK11-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK11-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP59]])
+// CHECK11-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK11-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK11-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK11:       omp_offload.failed15:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK11:       omp_offload.cont16:
-// CHECK11-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP64]], i32* [[N_CASTED17]], align 4
-// CHECK11-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N_CASTED17]], align 4
-// CHECK11-NEXT:    [[TMP66:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK11-NEXT:    [[TMP67:%.*]] = sext i32 [[TMP66]] to i64
-// CHECK11-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP65]], i32* [[TMP69]], align 4
-// CHECK11-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP65]], i32* [[TMP71]], align 4
-// CHECK11-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK11-NEXT:    store i64 4, i64* [[TMP72]], align 4
-// CHECK11-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
-// CHECK11-NEXT:    store i8* null, i8** [[TMP73]], align 4
-// CHECK11-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP62:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP62]], i32* [[N_CASTED17]], align 4
+// CHECK11-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N_CASTED17]], align 4
+// CHECK11-NEXT:    [[TMP64:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK11-NEXT:    [[TMP65:%.*]] = sext i32 [[TMP64]] to i64
+// CHECK11-NEXT:    [[TMP66:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES21]] to i8*
+// CHECK11-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP66]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.5 to i8*), i32 24, i1 false)
+// CHECK11-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP63]], i32* [[TMP68]], align 4
+// CHECK11-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP63]], i32* [[TMP70]], align 4
+// CHECK11-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
+// CHECK11-NEXT:    store i8* null, i8** [[TMP71]], align 4
+// CHECK11-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK11-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
+// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP73]], align 4
+// CHECK11-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
 // CHECK11-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i32*
 // CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP75]], align 4
-// CHECK11-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
-// CHECK11-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i32*
-// CHECK11-NEXT:    store i32 [[TMP0]], i32* [[TMP77]], align 4
-// CHECK11-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 1
-// CHECK11-NEXT:    store i64 4, i64* [[TMP78]], align 4
-// CHECK11-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
-// CHECK11-NEXT:    store i8* null, i8** [[TMP79]], align 4
-// CHECK11-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 4
-// CHECK11-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
-// CHECK11-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
-// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 4
-// CHECK11-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 2
-// CHECK11-NEXT:    store i64 [[TMP67]], i64* [[TMP84]], align 4
-// CHECK11-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
-// CHECK11-NEXT:    store i8* null, i8** [[TMP85]], align 4
-// CHECK11-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK11-NEXT:    [[TMP89:%.*]] = load i32, i32* [[N]], align 4
-// CHECK11-NEXT:    store i32 [[TMP89]], i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK11-NEXT:    [[TMP90:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK11-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP90]], 0
+// CHECK11-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
+// CHECK11-NEXT:    store i8* null, i8** [[TMP76]], align 4
+// CHECK11-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP78]], align 4
+// CHECK11-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
+// CHECK11-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32**
+// CHECK11-NEXT:    store i32* [[VLA]], i32** [[TMP80]], align 4
+// CHECK11-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 2
+// CHECK11-NEXT:    store i64 [[TMP65]], i64* [[TMP81]], align 4
+// CHECK11-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
+// CHECK11-NEXT:    store i8* null, i8** [[TMP82]], align 4
+// CHECK11-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
+// CHECK11-NEXT:    [[TMP86:%.*]] = load i32, i32* [[N]], align 4
+// CHECK11-NEXT:    store i32 [[TMP86]], i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK11-NEXT:    [[TMP87:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK11-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP87]], 0
 // CHECK11-NEXT:    [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1
 // CHECK11-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[DIV26]], 1
 // CHECK11-NEXT:    store i32 [[SUB27]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK11-NEXT:    [[TMP91:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK11-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP91]], 1
-// CHECK11-NEXT:    [[TMP92:%.*]] = zext i32 [[ADD28]] to i64
-// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP92]])
-// CHECK11-NEXT:    [[TMP93:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110.region_id, i32 3, i8** [[TMP86]], i8** [[TMP87]], i64* [[TMP88]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK11-NEXT:    [[TMP94:%.*]] = icmp ne i32 [[TMP93]], 0
-// CHECK11-NEXT:    br i1 [[TMP94]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
+// CHECK11-NEXT:    [[TMP88:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK11-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP88]], 1
+// CHECK11-NEXT:    [[TMP89:%.*]] = zext i32 [[ADD28]] to i64
+// CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP89]])
+// CHECK11-NEXT:    [[TMP90:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110.region_id, i32 3, i8** [[TMP83]], i8** [[TMP84]], i64* [[TMP85]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK11-NEXT:    [[TMP91:%.*]] = icmp ne i32 [[TMP90]], 0
+// CHECK11-NEXT:    br i1 [[TMP91]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
 // CHECK11:       omp_offload.failed29:
-// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110(i32 [[TMP65]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK11-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110(i32 [[TMP63]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK11-NEXT:    br label [[OMP_OFFLOAD_CONT30]]
 // CHECK11:       omp_offload.cont30:
-// CHECK11-NEXT:    [[TMP95:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP95]])
+// CHECK11-NEXT:    [[TMP92:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK11-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP92]])
 // CHECK11-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    [[TMP96:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP96]])
-// CHECK11-NEXT:    [[TMP97:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK11-NEXT:    ret i32 [[TMP97]]
+// CHECK11-NEXT:    [[TMP93:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK11-NEXT:    call void @llvm.stackrestore(i8* [[TMP93]])
+// CHECK11-NEXT:    [[TMP94:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK11-NEXT:    ret i32 [[TMP94]]
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100
@@ -4508,11 +4490,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK11-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK11-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -4639,7 +4621,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -4667,7 +4649,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l79.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK11-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l79.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK11-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK11-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK11:       omp_offload.failed:
@@ -4685,7 +4667,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK11-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK11-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK11-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK11-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK11:       omp_offload.failed5:
@@ -4703,7 +4685,7 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK11-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK11-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK11-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK11-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK11-NEXT:    [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
 // CHECK11-NEXT:    br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK11:       omp_offload.failed11:
@@ -4719,11 +4701,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK11-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -4802,11 +4784,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK11-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -4885,11 +4867,11 @@ int main (int argc, char **argv) {
 // CHECK11-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK11-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK11-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK11-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10
+// CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK11-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -4980,7 +4962,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK11-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK11-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK11-NEXT:    ret void
@@ -5033,14 +5015,14 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4
 // CHECK12-NEXT:    [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4
 // CHECK12-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP7]], align 4
-// CHECK12-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP9]], align 4
-// CHECK12-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP10]], align 4
+// CHECK12-NEXT:    [[TMP6:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP6]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes to i8*), i32 24, i1 false)
+// CHECK12-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP8]], align 4
+// CHECK12-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP3]], i32* [[TMP10]], align 4
 // CHECK12-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK12-NEXT:    store i8* null, i8** [[TMP11]], align 4
 // CHECK12-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -5049,160 +5031,154 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP15]], align 4
-// CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP16]], align 4
-// CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP17]], align 4
-// CHECK12-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP19]], align 4
-// CHECK12-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP21]], align 4
-// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 [[TMP5]], i64* [[TMP22]], align 4
-// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP23]], align 4
-// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP27:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP27]], i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
-// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0
+// CHECK12-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP16]], align 4
+// CHECK12-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP18]], align 4
+// CHECK12-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP20:%.*]] = bitcast i8** [[TMP19]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP20]], align 4
+// CHECK12-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
+// CHECK12-NEXT:    store i64 [[TMP5]], i64* [[TMP21]], align 4
+// CHECK12-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP22]], align 4
+// CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP26:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// CHECK12-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP27]], 0
 // CHECK12-NEXT:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
 // CHECK12-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
 // CHECK12-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK12-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP29]], 1
-// CHECK12-NEXT:    [[TMP30:%.*]] = zext i32 [[ADD]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP30]])
-// CHECK12-NEXT:    [[TMP31:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP24]], i8** [[TMP25]], i64* [[TMP26]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK12-NEXT:    [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
-// CHECK12-NEXT:    br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK12-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK12-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP28]], 1
+// CHECK12-NEXT:    [[TMP29:%.*]] = zext i32 [[ADD]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2:[0-9]+]], i64 -1, i64 [[TMP29]])
+// CHECK12-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100.region_id, i32 3, i8** [[TMP23]], i8** [[TMP24]], i64* [[TMP25]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK12-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK12-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
 // CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100(i32 [[TMP3]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3:[0-9]+]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK12:       omp_offload.cont:
-// CHECK12-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP33]], i32* [[N_CASTED3]], align 4
-// CHECK12-NEXT:    [[TMP34:%.*]] = load i32, i32* [[N_CASTED3]], align 4
-// CHECK12-NEXT:    [[TMP35:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK12-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
+// CHECK12-NEXT:    [[TMP32:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP32]], i32* [[N_CASTED3]], align 4
+// CHECK12-NEXT:    [[TMP33:%.*]] = load i32, i32* [[N_CASTED3]], align 4
+// CHECK12-NEXT:    [[TMP34:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK12-NEXT:    [[TMP35:%.*]] = sext i32 [[TMP34]] to i64
+// CHECK12-NEXT:    [[TMP36:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES7]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP36]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.2 to i8*), i32 24, i1 false)
 // CHECK12-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP34]], i32* [[TMP38]], align 4
+// CHECK12-NEXT:    store i32 [[TMP33]], i32* [[TMP38]], align 4
 // CHECK12-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP34]], i32* [[TMP40]], align 4
-// CHECK12-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK12-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
-// CHECK12-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK12-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
-// CHECK12-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP44]], align 4
-// CHECK12-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
-// CHECK12-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP46]], align 4
-// CHECK12-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK12-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK12-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK12-NEXT:    store i32 [[TMP33]], i32* [[TMP40]], align 4
+// CHECK12-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 0
+// CHECK12-NEXT:    store i8* null, i8** [[TMP41]], align 4
+// CHECK12-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP43]], align 4
+// CHECK12-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP45]], align 4
+// CHECK12-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP46]], align 4
+// CHECK12-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP48]], align 4
+// CHECK12-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
 // CHECK12-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i32**
 // CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP50]], align 4
-// CHECK12-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP52]], align 4
-// CHECK12-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 [[TMP36]], i64* [[TMP53]], align 4
-// CHECK12-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK12-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP58:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP58]], i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK12-NEXT:    [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
-// CHECK12-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP59]], 0
+// CHECK12-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 2
+// CHECK12-NEXT:    store i64 [[TMP35]], i64* [[TMP51]], align 4
+// CHECK12-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP52]], align 4
+// CHECK12-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES7]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP56:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP56]], i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK12-NEXT:    [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_9]], align 4
+// CHECK12-NEXT:    [[SUB11:%.*]] = sub nsw i32 [[TMP57]], 0
 // CHECK12-NEXT:    [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
 // CHECK12-NEXT:    [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1
 // CHECK12-NEXT:    store i32 [[SUB13]], i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK12-NEXT:    [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
-// CHECK12-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP60]], 1
-// CHECK12-NEXT:    [[TMP61:%.*]] = zext i32 [[ADD14]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP61]])
-// CHECK12-NEXT:    [[TMP62:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105.region_id, i32 3, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK12-NEXT:    [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0
-// CHECK12-NEXT:    br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
+// CHECK12-NEXT:    [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_10]], align 4
+// CHECK12-NEXT:    [[ADD14:%.*]] = add nsw i32 [[TMP58]], 1
+// CHECK12-NEXT:    [[TMP59:%.*]] = zext i32 [[ADD14]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP59]])
+// CHECK12-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105.region_id, i32 3, i8** [[TMP53]], i8** [[TMP54]], i64* [[TMP55]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK12-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK12-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]]
 // CHECK12:       omp_offload.failed15:
-// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105(i32 [[TMP34]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l105(i32 [[TMP33]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT16]]
 // CHECK12:       omp_offload.cont16:
-// CHECK12-NEXT:    [[TMP64:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP64]], i32* [[N_CASTED17]], align 4
-// CHECK12-NEXT:    [[TMP65:%.*]] = load i32, i32* [[N_CASTED17]], align 4
-// CHECK12-NEXT:    [[TMP66:%.*]] = mul nuw i32 [[TMP0]], 4
-// CHECK12-NEXT:    [[TMP67:%.*]] = sext i32 [[TMP66]] to i64
-// CHECK12-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP65]], i32* [[TMP69]], align 4
-// CHECK12-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP65]], i32* [[TMP71]], align 4
-// CHECK12-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK12-NEXT:    store i64 4, i64* [[TMP72]], align 4
-// CHECK12-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
-// CHECK12-NEXT:    store i8* null, i8** [[TMP73]], align 4
-// CHECK12-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP62:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP62]], i32* [[N_CASTED17]], align 4
+// CHECK12-NEXT:    [[TMP63:%.*]] = load i32, i32* [[N_CASTED17]], align 4
+// CHECK12-NEXT:    [[TMP64:%.*]] = mul nuw i32 [[TMP0]], 4
+// CHECK12-NEXT:    [[TMP65:%.*]] = sext i32 [[TMP64]] to i64
+// CHECK12-NEXT:    [[TMP66:%.*]] = bitcast [3 x i64]* [[DOTOFFLOAD_SIZES21]] to i8*
+// CHECK12-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP66]], i8* align 4 bitcast ([3 x i64]* @.offload_sizes.5 to i8*), i32 24, i1 false)
+// CHECK12-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP63]], i32* [[TMP68]], align 4
+// CHECK12-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP70:%.*]] = bitcast i8** [[TMP69]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP63]], i32* [[TMP70]], align 4
+// CHECK12-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 0
+// CHECK12-NEXT:    store i8* null, i8** [[TMP71]], align 4
+// CHECK12-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 1
+// CHECK12-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32*
+// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP73]], align 4
+// CHECK12-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
 // CHECK12-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i32*
 // CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP75]], align 4
-// CHECK12-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 1
-// CHECK12-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i32*
-// CHECK12-NEXT:    store i32 [[TMP0]], i32* [[TMP77]], align 4
-// CHECK12-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 1
-// CHECK12-NEXT:    store i64 4, i64* [[TMP78]], align 4
-// CHECK12-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
-// CHECK12-NEXT:    store i8* null, i8** [[TMP79]], align 4
-// CHECK12-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP81:%.*]] = bitcast i8** [[TMP80]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP81]], align 4
-// CHECK12-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
-// CHECK12-NEXT:    [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32**
-// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP83]], align 4
-// CHECK12-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 2
-// CHECK12-NEXT:    store i64 [[TMP67]], i64* [[TMP84]], align 4
-// CHECK12-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
-// CHECK12-NEXT:    store i8* null, i8** [[TMP85]], align 4
-// CHECK12-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP88:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
-// CHECK12-NEXT:    [[TMP89:%.*]] = load i32, i32* [[N]], align 4
-// CHECK12-NEXT:    store i32 [[TMP89]], i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK12-NEXT:    [[TMP90:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
-// CHECK12-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP90]], 0
+// CHECK12-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 1
+// CHECK12-NEXT:    store i8* null, i8** [[TMP76]], align 4
+// CHECK12-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP78]], align 4
+// CHECK12-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 2
+// CHECK12-NEXT:    [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32**
+// CHECK12-NEXT:    store i32* [[VLA]], i32** [[TMP80]], align 4
+// CHECK12-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 2
+// CHECK12-NEXT:    store i64 [[TMP65]], i64* [[TMP81]], align 4
+// CHECK12-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS20]], i32 0, i32 2
+// CHECK12-NEXT:    store i8* null, i8** [[TMP82]], align 4
+// CHECK12-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS19]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[DOTOFFLOAD_SIZES21]], i32 0, i32 0
+// CHECK12-NEXT:    [[TMP86:%.*]] = load i32, i32* [[N]], align 4
+// CHECK12-NEXT:    store i32 [[TMP86]], i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK12-NEXT:    [[TMP87:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_23]], align 4
+// CHECK12-NEXT:    [[SUB25:%.*]] = sub nsw i32 [[TMP87]], 0
 // CHECK12-NEXT:    [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1
 // CHECK12-NEXT:    [[SUB27:%.*]] = sub nsw i32 [[DIV26]], 1
 // CHECK12-NEXT:    store i32 [[SUB27]], i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK12-NEXT:    [[TMP91:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
-// CHECK12-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP91]], 1
-// CHECK12-NEXT:    [[TMP92:%.*]] = zext i32 [[ADD28]] to i64
-// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP92]])
-// CHECK12-NEXT:    [[TMP93:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110.region_id, i32 3, i8** [[TMP86]], i8** [[TMP87]], i64* [[TMP88]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
-// CHECK12-NEXT:    [[TMP94:%.*]] = icmp ne i32 [[TMP93]], 0
-// CHECK12-NEXT:    br i1 [[TMP94]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
+// CHECK12-NEXT:    [[TMP88:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_24]], align 4
+// CHECK12-NEXT:    [[ADD28:%.*]] = add nsw i32 [[TMP88]], 1
+// CHECK12-NEXT:    [[TMP89:%.*]] = zext i32 [[ADD28]] to i64
+// CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 [[TMP89]])
+// CHECK12-NEXT:    [[TMP90:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110.region_id, i32 3, i8** [[TMP83]], i8** [[TMP84]], i64* [[TMP85]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK12-NEXT:    [[TMP91:%.*]] = icmp ne i32 [[TMP90]], 0
+// CHECK12-NEXT:    br i1 [[TMP91]], label [[OMP_OFFLOAD_FAILED29:%.*]], label [[OMP_OFFLOAD_CONT30:%.*]]
 // CHECK12:       omp_offload.failed29:
-// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110(i32 [[TMP65]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
+// CHECK12-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l110(i32 [[TMP63]], i32 [[TMP0]], i32* [[VLA]]) #[[ATTR3]]
 // CHECK12-NEXT:    br label [[OMP_OFFLOAD_CONT30]]
 // CHECK12:       omp_offload.cont30:
-// CHECK12-NEXT:    [[TMP95:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP95]])
+// CHECK12-NEXT:    [[TMP92:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
+// CHECK12-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP92]])
 // CHECK12-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    [[TMP96:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP96]])
-// CHECK12-NEXT:    [[TMP97:%.*]] = load i32, i32* [[RETVAL]], align 4
-// CHECK12-NEXT:    ret i32 [[TMP97]]
+// CHECK12-NEXT:    [[TMP93:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK12-NEXT:    call void @llvm.stackrestore(i8* [[TMP93]])
+// CHECK12-NEXT:    [[TMP94:%.*]] = load i32, i32* [[RETVAL]], align 4
+// CHECK12-NEXT:    ret i32 [[TMP94]]
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l100
@@ -5465,11 +5441,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
 // CHECK12-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
 // CHECK12-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32, i32*, i32)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP4]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..3
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..4
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5596,7 +5572,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_
-// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat {
+// CHECK12-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK12-NEXT:    [[A:%.*]] = alloca [10 x i32], align 4
@@ -5624,7 +5600,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l79.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.6, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK12-NEXT:    [[TMP7:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l79.region_id, i32 1, i8** [[TMP5]], i8** [[TMP6]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK12-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK12-NEXT:    br i1 [[TMP8]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK12:       omp_offload.failed:
@@ -5642,7 +5618,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK12-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.8, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.9, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK12-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l84.region_id, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK12-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
 // CHECK12-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]]
 // CHECK12:       omp_offload.failed5:
@@ -5660,7 +5636,7 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
 // CHECK12-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
 // CHECK12-NEXT:    call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i64 10)
-// CHECK12-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.11, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.12, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
+// CHECK12-NEXT:    [[TMP25:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l89.region_id, i32 1, i8** [[TMP23]], i8** [[TMP24]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.14, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.15, i32 0, i32 0), i8** null, i8** null, i32 0, i32 1)
 // CHECK12-NEXT:    [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
 // CHECK12-NEXT:    br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]]
 // CHECK12:       omp_offload.failed11:
@@ -5676,11 +5652,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK12-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK12-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..5
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..7
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5759,11 +5735,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK12-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK12-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..7
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..10
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5842,11 +5818,11 @@ int main (int argc, char **argv) {
 // CHECK12-NEXT:    [[A_ADDR:%.*]] = alloca [10 x i32]*, align 4
 // CHECK12-NEXT:    store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 4
 // CHECK12-NEXT:    [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 4
-// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..10 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
+// CHECK12-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..13 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP0]])
 // CHECK12-NEXT:    ret void
 //
 //
-// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..10
+// CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..13
 // CHECK12-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4
@@ -5937,7 +5913,7 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK12-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
-// CHECK12-SAME: () #[[ATTR5:[0-9]+]] {
+// CHECK12-SAME: () #[[ATTR6:[0-9]+]] {
 // CHECK12-NEXT:  entry:
 // CHECK12-NEXT:    call void @__tgt_register_requires(i64 1)
 // CHECK12-NEXT:    ret void

diff  --git a/clang/test/OpenMP/teams_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_firstprivate_codegen.cpp
index e5c43fb91ae55..9052c9f6d71eb 100644
--- a/clang/test/OpenMP/teams_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_firstprivate_codegen.cpp
@@ -3351,14 +3351,14 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK17-NEXT:    [[TMP12:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK17-NEXT:    [[TMP13:%.*]] = mul nuw i64 [[TMP3]], [[TMP5]]
 // CHECK17-NEXT:    [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 8
-// CHECK17-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to float**
-// CHECK17-NEXT:    store float* [[TMP8]], float** [[TMP16]], align 8
-// CHECK17-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to float**
-// CHECK17-NEXT:    store float* [[TMP8]], float** [[TMP18]], align 8
-// CHECK17-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 0, i64* [[TMP19]], align 8
+// CHECK17-NEXT:    [[TMP15:%.*]] = bitcast [8 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP15]], i8* align 8 bitcast ([8 x i64]* @.offload_sizes to i8*), i64 64, i1 false)
+// CHECK17-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to float**
+// CHECK17-NEXT:    store float* [[TMP8]], float** [[TMP17]], align 8
+// CHECK17-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to float**
+// CHECK17-NEXT:    store float* [[TMP8]], float** [[TMP19]], align 8
 // CHECK17-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK17-NEXT:    store i8* null, i8** [[TMP20]], align 8
 // CHECK17-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -3367,82 +3367,70 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK17-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK17-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to %struct.St**
 // CHECK17-NEXT:    store %struct.St* [[TMP9]], %struct.St** [[TMP24]], align 8
-// CHECK17-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 0, i64* [[TMP25]], align 8
-// CHECK17-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP26]], align 8
-// CHECK17-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP28]], align 8
-// CHECK17-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP30]], align 8
-// CHECK17-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 8, i64* [[TMP31]], align 8
-// CHECK17-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP32]], align 8
-// CHECK17-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP25]], align 8
+// CHECK17-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP27]], align 8
+// CHECK17-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP29]], align 8
+// CHECK17-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP30]], align 8
+// CHECK17-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to ppc_fp128**
+// CHECK17-NEXT:    store ppc_fp128* [[TMP10]], ppc_fp128** [[TMP32]], align 8
+// CHECK17-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK17-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to ppc_fp128**
 // CHECK17-NEXT:    store ppc_fp128* [[TMP10]], ppc_fp128** [[TMP34]], align 8
-// CHECK17-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to ppc_fp128**
-// CHECK17-NEXT:    store ppc_fp128* [[TMP10]], ppc_fp128** [[TMP36]], align 8
-// CHECK17-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK17-NEXT:    store i64 0, i64* [[TMP37]], align 8
-// CHECK17-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK17-NEXT:    store i8* null, i8** [[TMP38]], align 8
-// CHECK17-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK17-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP3]], i64* [[TMP40]], align 8
-// CHECK17-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK17-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK17-NEXT:    store i8* null, i8** [[TMP35]], align 8
+// CHECK17-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK17-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP3]], i64* [[TMP37]], align 8
+// CHECK17-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK17-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP3]], i64* [[TMP39]], align 8
+// CHECK17-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK17-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK17-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
 // CHECK17-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP3]], i64* [[TMP42]], align 8
-// CHECK17-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK17-NEXT:    store i64 8, i64* [[TMP43]], align 8
-// CHECK17-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK17-NEXT:    store i8* null, i8** [[TMP44]], align 8
-// CHECK17-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
-// CHECK17-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP46]], align 8
-// CHECK17-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
-// CHECK17-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP48]], align 8
-// CHECK17-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK17-NEXT:    store i64 8, i64* [[TMP49]], align 8
-// CHECK17-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
-// CHECK17-NEXT:    store i8* null, i8** [[TMP50]], align 8
-// CHECK17-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
-// CHECK17-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to double**
-// CHECK17-NEXT:    store double* [[VLA]], double** [[TMP52]], align 8
-// CHECK17-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
-// CHECK17-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to double**
-// CHECK17-NEXT:    store double* [[VLA]], double** [[TMP54]], align 8
-// CHECK17-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK17-NEXT:    store i64 [[TMP14]], i64* [[TMP55]], align 8
-// CHECK17-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6
+// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP42]], align 8
+// CHECK17-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK17-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP44]], align 8
+// CHECK17-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
+// CHECK17-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK17-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
+// CHECK17-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to double**
+// CHECK17-NEXT:    store double* [[VLA]], double** [[TMP47]], align 8
+// CHECK17-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
+// CHECK17-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to double**
+// CHECK17-NEXT:    store double* [[VLA]], double** [[TMP49]], align 8
+// CHECK17-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
+// CHECK17-NEXT:    store i64 [[TMP14]], i64* [[TMP50]], align 8
+// CHECK17-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6
+// CHECK17-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK17-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7
+// CHECK17-NEXT:    [[TMP53:%.*]] = bitcast i8** [[TMP52]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP12]], i64* [[TMP53]], align 8
+// CHECK17-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7
+// CHECK17-NEXT:    [[TMP55:%.*]] = bitcast i8** [[TMP54]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP12]], i64* [[TMP55]], align 8
+// CHECK17-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7
 // CHECK17-NEXT:    store i8* null, i8** [[TMP56]], align 8
-// CHECK17-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7
-// CHECK17-NEXT:    [[TMP58:%.*]] = bitcast i8** [[TMP57]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP12]], i64* [[TMP58]], align 8
-// CHECK17-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7
-// CHECK17-NEXT:    [[TMP60:%.*]] = bitcast i8** [[TMP59]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP12]], i64* [[TMP60]], align 8
-// CHECK17-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK17-NEXT:    store i64 4, i64* [[TMP61]], align 8
-// CHECK17-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7
-// CHECK17-NEXT:    store i8* null, i8** [[TMP62]], align 8
-// CHECK17-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP66:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z10array_funcPfP2StiPg_l152.region_id, i32 8, i8** [[TMP63]], i8** [[TMP64]], i64* [[TMP65]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP67:%.*]] = icmp ne i32 [[TMP66]], 0
-// CHECK17-NEXT:    br i1 [[TMP67]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK17-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z10array_funcPfP2StiPg_l152.region_id, i32 8, i8** [[TMP57]], i8** [[TMP58]], i64* [[TMP59]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK17-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK17:       omp_offload.failed:
 // CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z10array_funcPfP2StiPg_l152(float* [[TMP8]], %struct.St* [[TMP9]], i64 [[TMP1]], ppc_fp128* [[TMP10]], i64 [[TMP3]], i64 [[TMP5]], double* [[VLA]], i64 [[TMP12]]) #[[ATTR4:[0-9]+]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK17:       omp_offload.cont:
-// CHECK17-NEXT:    [[TMP68:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP68]])
+// CHECK17-NEXT:    [[TMP62:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP62]])
 // CHECK17-NEXT:    ret void
 //
 //
@@ -3583,14 +3571,14 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK17-NEXT:    [[TMP19:%.*]] = ptrtoint i8* [[TMP16]] to i64
 // CHECK17-NEXT:    [[TMP20:%.*]] = sub i64 [[TMP18]], [[TMP19]]
 // CHECK17-NEXT:    [[TMP21:%.*]] = sdiv exact i64 [[TMP20]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CHECK17-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.St**
-// CHECK17-NEXT:    store %struct.St* [[TMP9]], %struct.St** [[TMP23]], align 8
-// CHECK17-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.St**
-// CHECK17-NEXT:    store %struct.St* [[TMP9]], %struct.St** [[TMP25]], align 8
-// CHECK17-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    store i64 0, i64* [[TMP26]], align 8
+// CHECK17-NEXT:    [[TMP22:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK17-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP22]], i8* align 8 bitcast ([10 x i64]* @.offload_sizes.2 to i8*), i64 80, i1 false)
+// CHECK17-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to %struct.St**
+// CHECK17-NEXT:    store %struct.St* [[TMP9]], %struct.St** [[TMP24]], align 8
+// CHECK17-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to %struct.St**
+// CHECK17-NEXT:    store %struct.St* [[TMP9]], %struct.St** [[TMP26]], align 8
 // CHECK17-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK17-NEXT:    store i8* null, i8** [[TMP27]], align 8
 // CHECK17-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -3599,102 +3587,88 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK17-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK17-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
 // CHECK17-NEXT:    store i64 [[TMP1]], i64* [[TMP31]], align 8
-// CHECK17-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK17-NEXT:    store i64 8, i64* [[TMP32]], align 8
-// CHECK17-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK17-NEXT:    store i8* null, i8** [[TMP33]], align 8
-// CHECK17-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to ppc_fp128**
-// CHECK17-NEXT:    store ppc_fp128* [[TMP10]], ppc_fp128** [[TMP35]], align 8
-// CHECK17-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK17-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to ppc_fp128**
-// CHECK17-NEXT:    store ppc_fp128* [[TMP10]], ppc_fp128** [[TMP37]], align 8
-// CHECK17-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK17-NEXT:    store i64 0, i64* [[TMP38]], align 8
-// CHECK17-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK17-NEXT:    store i8* null, i8** [[TMP39]], align 8
-// CHECK17-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK17-NEXT:    store i8* null, i8** [[TMP32]], align 8
+// CHECK17-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to ppc_fp128**
+// CHECK17-NEXT:    store ppc_fp128* [[TMP10]], ppc_fp128** [[TMP34]], align 8
+// CHECK17-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to ppc_fp128**
+// CHECK17-NEXT:    store ppc_fp128* [[TMP10]], ppc_fp128** [[TMP36]], align 8
+// CHECK17-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK17-NEXT:    store i8* null, i8** [[TMP37]], align 8
+// CHECK17-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK17-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP3]], i64* [[TMP39]], align 8
+// CHECK17-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK17-NEXT:    [[TMP41:%.*]] = bitcast i8** [[TMP40]] to i64*
 // CHECK17-NEXT:    store i64 [[TMP3]], i64* [[TMP41]], align 8
-// CHECK17-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK17-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP3]], i64* [[TMP43]], align 8
-// CHECK17-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK17-NEXT:    store i64 8, i64* [[TMP44]], align 8
-// CHECK17-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK17-NEXT:    store i8* null, i8** [[TMP45]], align 8
-// CHECK17-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK17-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP47]], align 8
-// CHECK17-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK17-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP49]], align 8
-// CHECK17-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK17-NEXT:    store i64 8, i64* [[TMP50]], align 8
-// CHECK17-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK17-NEXT:    store i8* null, i8** [[TMP51]], align 8
-// CHECK17-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
-// CHECK17-NEXT:    [[TMP53:%.*]] = bitcast i8** [[TMP52]] to double**
-// CHECK17-NEXT:    store double* [[VLA]], double** [[TMP53]], align 8
-// CHECK17-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
-// CHECK17-NEXT:    [[TMP55:%.*]] = bitcast i8** [[TMP54]] to double**
-// CHECK17-NEXT:    store double* [[VLA]], double** [[TMP55]], align 8
-// CHECK17-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK17-NEXT:    store i64 [[TMP14]], i64* [[TMP56]], align 8
-// CHECK17-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
-// CHECK17-NEXT:    store i8* null, i8** [[TMP57]], align 8
-// CHECK17-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
-// CHECK17-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to %struct.St**
-// CHECK17-NEXT:    store %struct.St* [[THIS1]], %struct.St** [[TMP59]], align 8
-// CHECK17-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
-// CHECK17-NEXT:    [[TMP61:%.*]] = bitcast i8** [[TMP60]] to i32**
-// CHECK17-NEXT:    store i32* [[A3]], i32** [[TMP61]], align 8
-// CHECK17-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK17-NEXT:    store i64 [[TMP21]], i64* [[TMP62]], align 8
-// CHECK17-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6
-// CHECK17-NEXT:    store i8* null, i8** [[TMP63]], align 8
-// CHECK17-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7
-// CHECK17-NEXT:    [[TMP65:%.*]] = bitcast i8** [[TMP64]] to %struct.St**
-// CHECK17-NEXT:    store %struct.St* [[THIS1]], %struct.St** [[TMP65]], align 8
-// CHECK17-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7
-// CHECK17-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to i32**
-// CHECK17-NEXT:    store i32* [[B2]], i32** [[TMP67]], align 8
-// CHECK17-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK17-NEXT:    store i64 4, i64* [[TMP68]], align 8
-// CHECK17-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7
+// CHECK17-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK17-NEXT:    store i8* null, i8** [[TMP42]], align 8
+// CHECK17-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK17-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP44]], align 8
+// CHECK17-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK17-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP5]], i64* [[TMP46]], align 8
+// CHECK17-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK17-NEXT:    store i8* null, i8** [[TMP47]], align 8
+// CHECK17-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
+// CHECK17-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to double**
+// CHECK17-NEXT:    store double* [[VLA]], double** [[TMP49]], align 8
+// CHECK17-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK17-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to double**
+// CHECK17-NEXT:    store double* [[VLA]], double** [[TMP51]], align 8
+// CHECK17-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
+// CHECK17-NEXT:    store i64 [[TMP14]], i64* [[TMP52]], align 8
+// CHECK17-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
+// CHECK17-NEXT:    store i8* null, i8** [[TMP53]], align 8
+// CHECK17-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
+// CHECK17-NEXT:    [[TMP55:%.*]] = bitcast i8** [[TMP54]] to %struct.St**
+// CHECK17-NEXT:    store %struct.St* [[THIS1]], %struct.St** [[TMP55]], align 8
+// CHECK17-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
+// CHECK17-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to i32**
+// CHECK17-NEXT:    store i32* [[A3]], i32** [[TMP57]], align 8
+// CHECK17-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
+// CHECK17-NEXT:    store i64 [[TMP21]], i64* [[TMP58]], align 8
+// CHECK17-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6
+// CHECK17-NEXT:    store i8* null, i8** [[TMP59]], align 8
+// CHECK17-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7
+// CHECK17-NEXT:    [[TMP61:%.*]] = bitcast i8** [[TMP60]] to %struct.St**
+// CHECK17-NEXT:    store %struct.St* [[THIS1]], %struct.St** [[TMP61]], align 8
+// CHECK17-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7
+// CHECK17-NEXT:    [[TMP63:%.*]] = bitcast i8** [[TMP62]] to i32**
+// CHECK17-NEXT:    store i32* [[B2]], i32** [[TMP63]], align 8
+// CHECK17-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7
+// CHECK17-NEXT:    store i8* null, i8** [[TMP64]], align 8
+// CHECK17-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8
+// CHECK17-NEXT:    [[TMP66:%.*]] = bitcast i8** [[TMP65]] to %struct.St**
+// CHECK17-NEXT:    store %struct.St* [[THIS1]], %struct.St** [[TMP66]], align 8
+// CHECK17-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 8
+// CHECK17-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i32**
+// CHECK17-NEXT:    store i32* [[A3]], i32** [[TMP68]], align 8
+// CHECK17-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8
 // CHECK17-NEXT:    store i8* null, i8** [[TMP69]], align 8
-// CHECK17-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8
-// CHECK17-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to %struct.St**
-// CHECK17-NEXT:    store %struct.St* [[THIS1]], %struct.St** [[TMP71]], align 8
-// CHECK17-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 8
-// CHECK17-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32**
-// CHECK17-NEXT:    store i32* [[A3]], i32** [[TMP73]], align 8
-// CHECK17-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK17-NEXT:    store i64 4, i64* [[TMP74]], align 8
-// CHECK17-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8
-// CHECK17-NEXT:    store i8* null, i8** [[TMP75]], align 8
-// CHECK17-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 9
-// CHECK17-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP12]], i64* [[TMP77]], align 8
-// CHECK17-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 9
-// CHECK17-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i64*
-// CHECK17-NEXT:    store i64 [[TMP12]], i64* [[TMP79]], align 8
-// CHECK17-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK17-NEXT:    store i64 4, i64* [[TMP80]], align 8
-// CHECK17-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 9
-// CHECK17-NEXT:    store i8* null, i8** [[TMP81]], align 8
-// CHECK17-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK17-NEXT:    [[TMP85:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2St7St_funcEPS_iPg_l144.region_id, i32 10, i8** [[TMP82]], i8** [[TMP83]], i64* [[TMP84]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK17-NEXT:    [[TMP86:%.*]] = icmp ne i32 [[TMP85]], 0
-// CHECK17-NEXT:    br i1 [[TMP86]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK17-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 9
+// CHECK17-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP12]], i64* [[TMP71]], align 8
+// CHECK17-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 9
+// CHECK17-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i64*
+// CHECK17-NEXT:    store i64 [[TMP12]], i64* [[TMP73]], align 8
+// CHECK17-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 9
+// CHECK17-NEXT:    store i8* null, i8** [[TMP74]], align 8
+// CHECK17-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP78:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2St7St_funcEPS_iPg_l144.region_id, i32 10, i8** [[TMP75]], i8** [[TMP76]], i64* [[TMP77]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK17-NEXT:    [[TMP79:%.*]] = icmp ne i32 [[TMP78]], 0
+// CHECK17-NEXT:    br i1 [[TMP79]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK17:       omp_offload.failed:
 // CHECK17-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2St7St_funcEPS_iPg_l144(%struct.St* [[TMP9]], i64 [[TMP1]], ppc_fp128* [[TMP10]], i64 [[TMP3]], i64 [[TMP5]], double* [[VLA]], %struct.St* [[THIS1]], i64 [[TMP12]]) #[[ATTR4]]
 // CHECK17-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK17:       omp_offload.cont:
-// CHECK17-NEXT:    [[TMP87:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP87]])
+// CHECK17-NEXT:    [[TMP80:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK17-NEXT:    call void @llvm.stackrestore(i8* [[TMP80]])
 // CHECK17-NEXT:    ret void
 //
 //
@@ -3843,14 +3817,14 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK18-NEXT:    [[TMP12:%.*]] = load i64, i64* [[N_CASTED]], align 8
 // CHECK18-NEXT:    [[TMP13:%.*]] = mul nuw i64 [[TMP3]], [[TMP5]]
 // CHECK18-NEXT:    [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 8
-// CHECK18-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to float**
-// CHECK18-NEXT:    store float* [[TMP8]], float** [[TMP16]], align 8
-// CHECK18-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to float**
-// CHECK18-NEXT:    store float* [[TMP8]], float** [[TMP18]], align 8
-// CHECK18-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 0, i64* [[TMP19]], align 8
+// CHECK18-NEXT:    [[TMP15:%.*]] = bitcast [8 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP15]], i8* align 8 bitcast ([8 x i64]* @.offload_sizes to i8*), i64 64, i1 false)
+// CHECK18-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to float**
+// CHECK18-NEXT:    store float* [[TMP8]], float** [[TMP17]], align 8
+// CHECK18-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to float**
+// CHECK18-NEXT:    store float* [[TMP8]], float** [[TMP19]], align 8
 // CHECK18-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK18-NEXT:    store i8* null, i8** [[TMP20]], align 8
 // CHECK18-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -3859,82 +3833,70 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK18-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK18-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to %struct.St**
 // CHECK18-NEXT:    store %struct.St* [[TMP9]], %struct.St** [[TMP24]], align 8
-// CHECK18-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 0, i64* [[TMP25]], align 8
-// CHECK18-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP26]], align 8
-// CHECK18-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP28]], align 8
-// CHECK18-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP30]], align 8
-// CHECK18-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 8, i64* [[TMP31]], align 8
-// CHECK18-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP32]], align 8
-// CHECK18-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP25]], align 8
+// CHECK18-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP27]], align 8
+// CHECK18-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP29]], align 8
+// CHECK18-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP30]], align 8
+// CHECK18-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to ppc_fp128**
+// CHECK18-NEXT:    store ppc_fp128* [[TMP10]], ppc_fp128** [[TMP32]], align 8
+// CHECK18-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK18-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to ppc_fp128**
 // CHECK18-NEXT:    store ppc_fp128* [[TMP10]], ppc_fp128** [[TMP34]], align 8
-// CHECK18-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to ppc_fp128**
-// CHECK18-NEXT:    store ppc_fp128* [[TMP10]], ppc_fp128** [[TMP36]], align 8
-// CHECK18-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK18-NEXT:    store i64 0, i64* [[TMP37]], align 8
-// CHECK18-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK18-NEXT:    store i8* null, i8** [[TMP38]], align 8
-// CHECK18-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK18-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP3]], i64* [[TMP40]], align 8
-// CHECK18-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK18-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK18-NEXT:    store i8* null, i8** [[TMP35]], align 8
+// CHECK18-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK18-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP3]], i64* [[TMP37]], align 8
+// CHECK18-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK18-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP3]], i64* [[TMP39]], align 8
+// CHECK18-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK18-NEXT:    store i8* null, i8** [[TMP40]], align 8
+// CHECK18-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
 // CHECK18-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP3]], i64* [[TMP42]], align 8
-// CHECK18-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK18-NEXT:    store i64 8, i64* [[TMP43]], align 8
-// CHECK18-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK18-NEXT:    store i8* null, i8** [[TMP44]], align 8
-// CHECK18-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
-// CHECK18-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP46]], align 8
-// CHECK18-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
-// CHECK18-NEXT:    [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP48]], align 8
-// CHECK18-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK18-NEXT:    store i64 8, i64* [[TMP49]], align 8
-// CHECK18-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
-// CHECK18-NEXT:    store i8* null, i8** [[TMP50]], align 8
-// CHECK18-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
-// CHECK18-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to double**
-// CHECK18-NEXT:    store double* [[VLA]], double** [[TMP52]], align 8
-// CHECK18-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
-// CHECK18-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to double**
-// CHECK18-NEXT:    store double* [[VLA]], double** [[TMP54]], align 8
-// CHECK18-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK18-NEXT:    store i64 [[TMP14]], i64* [[TMP55]], align 8
-// CHECK18-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6
+// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP42]], align 8
+// CHECK18-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK18-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP44]], align 8
+// CHECK18-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
+// CHECK18-NEXT:    store i8* null, i8** [[TMP45]], align 8
+// CHECK18-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
+// CHECK18-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to double**
+// CHECK18-NEXT:    store double* [[VLA]], double** [[TMP47]], align 8
+// CHECK18-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
+// CHECK18-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to double**
+// CHECK18-NEXT:    store double* [[VLA]], double** [[TMP49]], align 8
+// CHECK18-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
+// CHECK18-NEXT:    store i64 [[TMP14]], i64* [[TMP50]], align 8
+// CHECK18-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6
+// CHECK18-NEXT:    store i8* null, i8** [[TMP51]], align 8
+// CHECK18-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7
+// CHECK18-NEXT:    [[TMP53:%.*]] = bitcast i8** [[TMP52]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP12]], i64* [[TMP53]], align 8
+// CHECK18-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7
+// CHECK18-NEXT:    [[TMP55:%.*]] = bitcast i8** [[TMP54]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP12]], i64* [[TMP55]], align 8
+// CHECK18-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7
 // CHECK18-NEXT:    store i8* null, i8** [[TMP56]], align 8
-// CHECK18-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7
-// CHECK18-NEXT:    [[TMP58:%.*]] = bitcast i8** [[TMP57]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP12]], i64* [[TMP58]], align 8
-// CHECK18-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7
-// CHECK18-NEXT:    [[TMP60:%.*]] = bitcast i8** [[TMP59]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP12]], i64* [[TMP60]], align 8
-// CHECK18-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK18-NEXT:    store i64 4, i64* [[TMP61]], align 8
-// CHECK18-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7
-// CHECK18-NEXT:    store i8* null, i8** [[TMP62]], align 8
-// CHECK18-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP66:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z10array_funcPfP2StiPg_l152.region_id, i32 8, i8** [[TMP63]], i8** [[TMP64]], i64* [[TMP65]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP67:%.*]] = icmp ne i32 [[TMP66]], 0
-// CHECK18-NEXT:    br i1 [[TMP67]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK18-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP60:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z10array_funcPfP2StiPg_l152.region_id, i32 8, i8** [[TMP57]], i8** [[TMP58]], i64* [[TMP59]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+// CHECK18-NEXT:    br i1 [[TMP61]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK18:       omp_offload.failed:
 // CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z10array_funcPfP2StiPg_l152(float* [[TMP8]], %struct.St* [[TMP9]], i64 [[TMP1]], ppc_fp128* [[TMP10]], i64 [[TMP3]], i64 [[TMP5]], double* [[VLA]], i64 [[TMP12]]) #[[ATTR4:[0-9]+]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK18:       omp_offload.cont:
-// CHECK18-NEXT:    [[TMP68:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP68]])
+// CHECK18-NEXT:    [[TMP62:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP62]])
 // CHECK18-NEXT:    ret void
 //
 //
@@ -4075,14 +4037,14 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK18-NEXT:    [[TMP19:%.*]] = ptrtoint i8* [[TMP16]] to i64
 // CHECK18-NEXT:    [[TMP20:%.*]] = sub i64 [[TMP18]], [[TMP19]]
 // CHECK18-NEXT:    [[TMP21:%.*]] = sdiv exact i64 [[TMP20]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CHECK18-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.St**
-// CHECK18-NEXT:    store %struct.St* [[TMP9]], %struct.St** [[TMP23]], align 8
-// CHECK18-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to %struct.St**
-// CHECK18-NEXT:    store %struct.St* [[TMP9]], %struct.St** [[TMP25]], align 8
-// CHECK18-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    store i64 0, i64* [[TMP26]], align 8
+// CHECK18-NEXT:    [[TMP22:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK18-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP22]], i8* align 8 bitcast ([10 x i64]* @.offload_sizes.2 to i8*), i64 80, i1 false)
+// CHECK18-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to %struct.St**
+// CHECK18-NEXT:    store %struct.St* [[TMP9]], %struct.St** [[TMP24]], align 8
+// CHECK18-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to %struct.St**
+// CHECK18-NEXT:    store %struct.St* [[TMP9]], %struct.St** [[TMP26]], align 8
 // CHECK18-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
 // CHECK18-NEXT:    store i8* null, i8** [[TMP27]], align 8
 // CHECK18-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -4091,102 +4053,88 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK18-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK18-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i64*
 // CHECK18-NEXT:    store i64 [[TMP1]], i64* [[TMP31]], align 8
-// CHECK18-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK18-NEXT:    store i64 8, i64* [[TMP32]], align 8
-// CHECK18-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK18-NEXT:    store i8* null, i8** [[TMP33]], align 8
-// CHECK18-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to ppc_fp128**
-// CHECK18-NEXT:    store ppc_fp128* [[TMP10]], ppc_fp128** [[TMP35]], align 8
-// CHECK18-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK18-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to ppc_fp128**
-// CHECK18-NEXT:    store ppc_fp128* [[TMP10]], ppc_fp128** [[TMP37]], align 8
-// CHECK18-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK18-NEXT:    store i64 0, i64* [[TMP38]], align 8
-// CHECK18-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
-// CHECK18-NEXT:    store i8* null, i8** [[TMP39]], align 8
-// CHECK18-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK18-NEXT:    store i8* null, i8** [[TMP32]], align 8
+// CHECK18-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to ppc_fp128**
+// CHECK18-NEXT:    store ppc_fp128* [[TMP10]], ppc_fp128** [[TMP34]], align 8
+// CHECK18-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK18-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to ppc_fp128**
+// CHECK18-NEXT:    store ppc_fp128* [[TMP10]], ppc_fp128** [[TMP36]], align 8
+// CHECK18-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK18-NEXT:    store i8* null, i8** [[TMP37]], align 8
+// CHECK18-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK18-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP3]], i64* [[TMP39]], align 8
+// CHECK18-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK18-NEXT:    [[TMP41:%.*]] = bitcast i8** [[TMP40]] to i64*
 // CHECK18-NEXT:    store i64 [[TMP3]], i64* [[TMP41]], align 8
-// CHECK18-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK18-NEXT:    [[TMP43:%.*]] = bitcast i8** [[TMP42]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP3]], i64* [[TMP43]], align 8
-// CHECK18-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK18-NEXT:    store i64 8, i64* [[TMP44]], align 8
-// CHECK18-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
-// CHECK18-NEXT:    store i8* null, i8** [[TMP45]], align 8
-// CHECK18-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK18-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP47]], align 8
-// CHECK18-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK18-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP49]], align 8
-// CHECK18-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK18-NEXT:    store i64 8, i64* [[TMP50]], align 8
-// CHECK18-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
-// CHECK18-NEXT:    store i8* null, i8** [[TMP51]], align 8
-// CHECK18-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
-// CHECK18-NEXT:    [[TMP53:%.*]] = bitcast i8** [[TMP52]] to double**
-// CHECK18-NEXT:    store double* [[VLA]], double** [[TMP53]], align 8
-// CHECK18-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
-// CHECK18-NEXT:    [[TMP55:%.*]] = bitcast i8** [[TMP54]] to double**
-// CHECK18-NEXT:    store double* [[VLA]], double** [[TMP55]], align 8
-// CHECK18-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK18-NEXT:    store i64 [[TMP14]], i64* [[TMP56]], align 8
-// CHECK18-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
-// CHECK18-NEXT:    store i8* null, i8** [[TMP57]], align 8
-// CHECK18-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
-// CHECK18-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to %struct.St**
-// CHECK18-NEXT:    store %struct.St* [[THIS1]], %struct.St** [[TMP59]], align 8
-// CHECK18-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
-// CHECK18-NEXT:    [[TMP61:%.*]] = bitcast i8** [[TMP60]] to i32**
-// CHECK18-NEXT:    store i32* [[A3]], i32** [[TMP61]], align 8
-// CHECK18-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK18-NEXT:    store i64 [[TMP21]], i64* [[TMP62]], align 8
-// CHECK18-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6
-// CHECK18-NEXT:    store i8* null, i8** [[TMP63]], align 8
-// CHECK18-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7
-// CHECK18-NEXT:    [[TMP65:%.*]] = bitcast i8** [[TMP64]] to %struct.St**
-// CHECK18-NEXT:    store %struct.St* [[THIS1]], %struct.St** [[TMP65]], align 8
-// CHECK18-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7
-// CHECK18-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to i32**
-// CHECK18-NEXT:    store i32* [[B2]], i32** [[TMP67]], align 8
-// CHECK18-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK18-NEXT:    store i64 4, i64* [[TMP68]], align 8
-// CHECK18-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7
+// CHECK18-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
+// CHECK18-NEXT:    store i8* null, i8** [[TMP42]], align 8
+// CHECK18-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK18-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP44]], align 8
+// CHECK18-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK18-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP5]], i64* [[TMP46]], align 8
+// CHECK18-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
+// CHECK18-NEXT:    store i8* null, i8** [[TMP47]], align 8
+// CHECK18-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
+// CHECK18-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to double**
+// CHECK18-NEXT:    store double* [[VLA]], double** [[TMP49]], align 8
+// CHECK18-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK18-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to double**
+// CHECK18-NEXT:    store double* [[VLA]], double** [[TMP51]], align 8
+// CHECK18-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
+// CHECK18-NEXT:    store i64 [[TMP14]], i64* [[TMP52]], align 8
+// CHECK18-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
+// CHECK18-NEXT:    store i8* null, i8** [[TMP53]], align 8
+// CHECK18-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
+// CHECK18-NEXT:    [[TMP55:%.*]] = bitcast i8** [[TMP54]] to %struct.St**
+// CHECK18-NEXT:    store %struct.St* [[THIS1]], %struct.St** [[TMP55]], align 8
+// CHECK18-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
+// CHECK18-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to i32**
+// CHECK18-NEXT:    store i32* [[A3]], i32** [[TMP57]], align 8
+// CHECK18-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
+// CHECK18-NEXT:    store i64 [[TMP21]], i64* [[TMP58]], align 8
+// CHECK18-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6
+// CHECK18-NEXT:    store i8* null, i8** [[TMP59]], align 8
+// CHECK18-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7
+// CHECK18-NEXT:    [[TMP61:%.*]] = bitcast i8** [[TMP60]] to %struct.St**
+// CHECK18-NEXT:    store %struct.St* [[THIS1]], %struct.St** [[TMP61]], align 8
+// CHECK18-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7
+// CHECK18-NEXT:    [[TMP63:%.*]] = bitcast i8** [[TMP62]] to i32**
+// CHECK18-NEXT:    store i32* [[B2]], i32** [[TMP63]], align 8
+// CHECK18-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7
+// CHECK18-NEXT:    store i8* null, i8** [[TMP64]], align 8
+// CHECK18-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8
+// CHECK18-NEXT:    [[TMP66:%.*]] = bitcast i8** [[TMP65]] to %struct.St**
+// CHECK18-NEXT:    store %struct.St* [[THIS1]], %struct.St** [[TMP66]], align 8
+// CHECK18-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 8
+// CHECK18-NEXT:    [[TMP68:%.*]] = bitcast i8** [[TMP67]] to i32**
+// CHECK18-NEXT:    store i32* [[A3]], i32** [[TMP68]], align 8
+// CHECK18-NEXT:    [[TMP69:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8
 // CHECK18-NEXT:    store i8* null, i8** [[TMP69]], align 8
-// CHECK18-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8
-// CHECK18-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to %struct.St**
-// CHECK18-NEXT:    store %struct.St* [[THIS1]], %struct.St** [[TMP71]], align 8
-// CHECK18-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 8
-// CHECK18-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i32**
-// CHECK18-NEXT:    store i32* [[A3]], i32** [[TMP73]], align 8
-// CHECK18-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK18-NEXT:    store i64 4, i64* [[TMP74]], align 8
-// CHECK18-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8
-// CHECK18-NEXT:    store i8* null, i8** [[TMP75]], align 8
-// CHECK18-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 9
-// CHECK18-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP12]], i64* [[TMP77]], align 8
-// CHECK18-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 9
-// CHECK18-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i64*
-// CHECK18-NEXT:    store i64 [[TMP12]], i64* [[TMP79]], align 8
-// CHECK18-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK18-NEXT:    store i64 4, i64* [[TMP80]], align 8
-// CHECK18-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 9
-// CHECK18-NEXT:    store i8* null, i8** [[TMP81]], align 8
-// CHECK18-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP84:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK18-NEXT:    [[TMP85:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2St7St_funcEPS_iPg_l144.region_id, i32 10, i8** [[TMP82]], i8** [[TMP83]], i64* [[TMP84]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK18-NEXT:    [[TMP86:%.*]] = icmp ne i32 [[TMP85]], 0
-// CHECK18-NEXT:    br i1 [[TMP86]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK18-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 9
+// CHECK18-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP12]], i64* [[TMP71]], align 8
+// CHECK18-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 9
+// CHECK18-NEXT:    [[TMP73:%.*]] = bitcast i8** [[TMP72]] to i64*
+// CHECK18-NEXT:    store i64 [[TMP12]], i64* [[TMP73]], align 8
+// CHECK18-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 9
+// CHECK18-NEXT:    store i8* null, i8** [[TMP74]], align 8
+// CHECK18-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP77:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK18-NEXT:    [[TMP78:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2St7St_funcEPS_iPg_l144.region_id, i32 10, i8** [[TMP75]], i8** [[TMP76]], i64* [[TMP77]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK18-NEXT:    [[TMP79:%.*]] = icmp ne i32 [[TMP78]], 0
+// CHECK18-NEXT:    br i1 [[TMP79]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK18:       omp_offload.failed:
 // CHECK18-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2St7St_funcEPS_iPg_l144(%struct.St* [[TMP9]], i64 [[TMP1]], ppc_fp128* [[TMP10]], i64 [[TMP3]], i64 [[TMP5]], double* [[VLA]], %struct.St* [[THIS1]], i64 [[TMP12]]) #[[ATTR4]]
 // CHECK18-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK18:       omp_offload.cont:
-// CHECK18-NEXT:    [[TMP87:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
-// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP87]])
+// CHECK18-NEXT:    [[TMP80:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8
+// CHECK18-NEXT:    call void @llvm.stackrestore(i8* [[TMP80]])
 // CHECK18-NEXT:    ret void
 //
 //
@@ -4332,14 +4280,14 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK19-NEXT:    [[TMP10:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
 // CHECK19-NEXT:    [[TMP11:%.*]] = mul nuw i32 [[TMP10]], 8
 // CHECK19-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
-// CHECK19-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to float**
-// CHECK19-NEXT:    store float* [[TMP5]], float** [[TMP14]], align 4
-// CHECK19-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to float**
-// CHECK19-NEXT:    store float* [[TMP5]], float** [[TMP16]], align 4
-// CHECK19-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 0, i64* [[TMP17]], align 4
+// CHECK19-NEXT:    [[TMP13:%.*]] = bitcast [8 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 bitcast ([8 x i64]* @.offload_sizes to i8*), i32 64, i1 false)
+// CHECK19-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to float**
+// CHECK19-NEXT:    store float* [[TMP5]], float** [[TMP15]], align 4
+// CHECK19-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to float**
+// CHECK19-NEXT:    store float* [[TMP5]], float** [[TMP17]], align 4
 // CHECK19-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK19-NEXT:    store i8* null, i8** [[TMP18]], align 4
 // CHECK19-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -4348,82 +4296,70 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK19-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK19-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to %struct.St**
 // CHECK19-NEXT:    store %struct.St* [[TMP6]], %struct.St** [[TMP22]], align 4
-// CHECK19-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 0, i64* [[TMP23]], align 4
-// CHECK19-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP24]], align 4
-// CHECK19-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP26]], align 4
-// CHECK19-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP28]], align 4
-// CHECK19-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 4, i64* [[TMP29]], align 4
-// CHECK19-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP30]], align 4
-// CHECK19-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP23]], align 4
+// CHECK19-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP25]], align 4
+// CHECK19-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP27]], align 4
+// CHECK19-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP28]], align 4
+// CHECK19-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to x86_fp80**
+// CHECK19-NEXT:    store x86_fp80* [[TMP7]], x86_fp80** [[TMP30]], align 4
+// CHECK19-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK19-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to x86_fp80**
 // CHECK19-NEXT:    store x86_fp80* [[TMP7]], x86_fp80** [[TMP32]], align 4
-// CHECK19-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to x86_fp80**
-// CHECK19-NEXT:    store x86_fp80* [[TMP7]], x86_fp80** [[TMP34]], align 4
-// CHECK19-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK19-NEXT:    store i64 0, i64* [[TMP35]], align 4
-// CHECK19-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK19-NEXT:    store i8* null, i8** [[TMP36]], align 4
-// CHECK19-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK19-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP38]], align 4
-// CHECK19-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK19-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK19-NEXT:    store i8* null, i8** [[TMP33]], align 4
+// CHECK19-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK19-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP35]], align 4
+// CHECK19-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK19-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP37]], align 4
+// CHECK19-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK19-NEXT:    store i8* null, i8** [[TMP38]], align 4
+// CHECK19-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
 // CHECK19-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP40]], align 4
-// CHECK19-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK19-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK19-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK19-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK19-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
-// CHECK19-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP2]], i32* [[TMP44]], align 4
-// CHECK19-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
-// CHECK19-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP2]], i32* [[TMP46]], align 4
-// CHECK19-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK19-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK19-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
-// CHECK19-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK19-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
-// CHECK19-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to double**
-// CHECK19-NEXT:    store double* [[VLA]], double** [[TMP50]], align 4
-// CHECK19-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
-// CHECK19-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to double**
-// CHECK19-NEXT:    store double* [[VLA]], double** [[TMP52]], align 4
-// CHECK19-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK19-NEXT:    store i64 [[TMP12]], i64* [[TMP53]], align 4
-// CHECK19-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6
+// CHECK19-NEXT:    store i32 [[TMP2]], i32* [[TMP40]], align 4
+// CHECK19-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK19-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP2]], i32* [[TMP42]], align 4
+// CHECK19-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
+// CHECK19-NEXT:    store i8* null, i8** [[TMP43]], align 4
+// CHECK19-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
+// CHECK19-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to double**
+// CHECK19-NEXT:    store double* [[VLA]], double** [[TMP45]], align 4
+// CHECK19-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
+// CHECK19-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to double**
+// CHECK19-NEXT:    store double* [[VLA]], double** [[TMP47]], align 4
+// CHECK19-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
+// CHECK19-NEXT:    store i64 [[TMP12]], i64* [[TMP48]], align 4
+// CHECK19-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6
+// CHECK19-NEXT:    store i8* null, i8** [[TMP49]], align 4
+// CHECK19-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7
+// CHECK19-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP9]], i32* [[TMP51]], align 4
+// CHECK19-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7
+// CHECK19-NEXT:    [[TMP53:%.*]] = bitcast i8** [[TMP52]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP9]], i32* [[TMP53]], align 4
+// CHECK19-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 7
 // CHECK19-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK19-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7
-// CHECK19-NEXT:    [[TMP56:%.*]] = bitcast i8** [[TMP55]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP9]], i32* [[TMP56]], align 4
-// CHECK19-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7
-// CHECK19-NEXT:    [[TMP58:%.*]] = bitcast i8** [[TMP57]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP9]], i32* [[TMP58]], align 4
-// CHECK19-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK19-NEXT:    store i64 4, i64* [[TMP59]], align 4
-// CHECK19-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 7
-// CHECK19-NEXT:    store i8* null, i8** [[TMP60]], align 4
-// CHECK19-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP64:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z10array_funcPfP2StiPe_l152.region_id, i32 8, i8** [[TMP61]], i8** [[TMP62]], i64* [[TMP63]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP65:%.*]] = icmp ne i32 [[TMP64]], 0
-// CHECK19-NEXT:    br i1 [[TMP65]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK19-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP58:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z10array_funcPfP2StiPe_l152.region_id, i32 8, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP59:%.*]] = icmp ne i32 [[TMP58]], 0
+// CHECK19-NEXT:    br i1 [[TMP59]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK19:       omp_offload.failed:
 // CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z10array_funcPfP2StiPe_l152(float* [[TMP5]], %struct.St* [[TMP6]], i32 [[TMP0]], x86_fp80* [[TMP7]], i32 [[TMP1]], i32 [[TMP2]], double* [[VLA]], i32 [[TMP9]]) #[[ATTR4:[0-9]+]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK19:       omp_offload.cont:
-// CHECK19-NEXT:    [[TMP66:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP66]])
+// CHECK19-NEXT:    [[TMP60:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP60]])
 // CHECK19-NEXT:    ret void
 //
 //
@@ -4560,14 +4496,14 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK19-NEXT:    [[TMP17:%.*]] = ptrtoint i8* [[TMP14]] to i64
 // CHECK19-NEXT:    [[TMP18:%.*]] = sub i64 [[TMP16]], [[TMP17]]
 // CHECK19-NEXT:    [[TMP19:%.*]] = sdiv exact i64 [[TMP18]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CHECK19-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to %struct.St**
-// CHECK19-NEXT:    store %struct.St* [[TMP6]], %struct.St** [[TMP21]], align 4
-// CHECK19-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.St**
-// CHECK19-NEXT:    store %struct.St* [[TMP6]], %struct.St** [[TMP23]], align 4
-// CHECK19-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    store i64 0, i64* [[TMP24]], align 4
+// CHECK19-NEXT:    [[TMP20:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK19-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 bitcast ([10 x i64]* @.offload_sizes.2 to i8*), i32 80, i1 false)
+// CHECK19-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to %struct.St**
+// CHECK19-NEXT:    store %struct.St* [[TMP6]], %struct.St** [[TMP22]], align 4
+// CHECK19-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to %struct.St**
+// CHECK19-NEXT:    store %struct.St* [[TMP6]], %struct.St** [[TMP24]], align 4
 // CHECK19-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK19-NEXT:    store i8* null, i8** [[TMP25]], align 4
 // CHECK19-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -4576,102 +4512,88 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK19-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK19-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
 // CHECK19-NEXT:    store i32 [[TMP0]], i32* [[TMP29]], align 4
-// CHECK19-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK19-NEXT:    store i64 4, i64* [[TMP30]], align 4
-// CHECK19-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK19-NEXT:    store i8* null, i8** [[TMP31]], align 4
-// CHECK19-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to x86_fp80**
-// CHECK19-NEXT:    store x86_fp80* [[TMP7]], x86_fp80** [[TMP33]], align 4
-// CHECK19-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK19-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to x86_fp80**
-// CHECK19-NEXT:    store x86_fp80* [[TMP7]], x86_fp80** [[TMP35]], align 4
-// CHECK19-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK19-NEXT:    store i64 0, i64* [[TMP36]], align 4
-// CHECK19-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK19-NEXT:    store i8* null, i8** [[TMP37]], align 4
-// CHECK19-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK19-NEXT:    store i8* null, i8** [[TMP30]], align 4
+// CHECK19-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to x86_fp80**
+// CHECK19-NEXT:    store x86_fp80* [[TMP7]], x86_fp80** [[TMP32]], align 4
+// CHECK19-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK19-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to x86_fp80**
+// CHECK19-NEXT:    store x86_fp80* [[TMP7]], x86_fp80** [[TMP34]], align 4
+// CHECK19-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK19-NEXT:    store i8* null, i8** [[TMP35]], align 4
+// CHECK19-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK19-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP37]], align 4
+// CHECK19-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK19-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i32*
 // CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP39]], align 4
-// CHECK19-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK19-NEXT:    [[TMP41:%.*]] = bitcast i8** [[TMP40]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP1]], i32* [[TMP41]], align 4
-// CHECK19-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK19-NEXT:    store i64 4, i64* [[TMP42]], align 4
-// CHECK19-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK19-NEXT:    store i8* null, i8** [[TMP43]], align 4
-// CHECK19-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK19-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP2]], i32* [[TMP45]], align 4
-// CHECK19-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK19-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP2]], i32* [[TMP47]], align 4
-// CHECK19-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK19-NEXT:    store i64 4, i64* [[TMP48]], align 4
-// CHECK19-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK19-NEXT:    store i8* null, i8** [[TMP49]], align 4
-// CHECK19-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
-// CHECK19-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to double**
-// CHECK19-NEXT:    store double* [[VLA]], double** [[TMP51]], align 4
-// CHECK19-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
-// CHECK19-NEXT:    [[TMP53:%.*]] = bitcast i8** [[TMP52]] to double**
-// CHECK19-NEXT:    store double* [[VLA]], double** [[TMP53]], align 4
-// CHECK19-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK19-NEXT:    store i64 [[TMP12]], i64* [[TMP54]], align 4
-// CHECK19-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
-// CHECK19-NEXT:    store i8* null, i8** [[TMP55]], align 4
-// CHECK19-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
-// CHECK19-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to %struct.St**
-// CHECK19-NEXT:    store %struct.St* [[THIS1]], %struct.St** [[TMP57]], align 4
-// CHECK19-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
-// CHECK19-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to i32**
-// CHECK19-NEXT:    store i32* [[A3]], i32** [[TMP59]], align 4
-// CHECK19-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK19-NEXT:    store i64 [[TMP19]], i64* [[TMP60]], align 4
-// CHECK19-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6
-// CHECK19-NEXT:    store i8* null, i8** [[TMP61]], align 4
-// CHECK19-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7
-// CHECK19-NEXT:    [[TMP63:%.*]] = bitcast i8** [[TMP62]] to %struct.St**
-// CHECK19-NEXT:    store %struct.St* [[THIS1]], %struct.St** [[TMP63]], align 4
-// CHECK19-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7
-// CHECK19-NEXT:    [[TMP65:%.*]] = bitcast i8** [[TMP64]] to i32**
-// CHECK19-NEXT:    store i32* [[B2]], i32** [[TMP65]], align 4
-// CHECK19-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK19-NEXT:    store i64 4, i64* [[TMP66]], align 4
-// CHECK19-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 7
+// CHECK19-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK19-NEXT:    store i8* null, i8** [[TMP40]], align 4
+// CHECK19-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK19-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP2]], i32* [[TMP42]], align 4
+// CHECK19-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK19-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP2]], i32* [[TMP44]], align 4
+// CHECK19-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK19-NEXT:    store i8* null, i8** [[TMP45]], align 4
+// CHECK19-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
+// CHECK19-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to double**
+// CHECK19-NEXT:    store double* [[VLA]], double** [[TMP47]], align 4
+// CHECK19-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK19-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to double**
+// CHECK19-NEXT:    store double* [[VLA]], double** [[TMP49]], align 4
+// CHECK19-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
+// CHECK19-NEXT:    store i64 [[TMP12]], i64* [[TMP50]], align 4
+// CHECK19-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
+// CHECK19-NEXT:    store i8* null, i8** [[TMP51]], align 4
+// CHECK19-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
+// CHECK19-NEXT:    [[TMP53:%.*]] = bitcast i8** [[TMP52]] to %struct.St**
+// CHECK19-NEXT:    store %struct.St* [[THIS1]], %struct.St** [[TMP53]], align 4
+// CHECK19-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
+// CHECK19-NEXT:    [[TMP55:%.*]] = bitcast i8** [[TMP54]] to i32**
+// CHECK19-NEXT:    store i32* [[A3]], i32** [[TMP55]], align 4
+// CHECK19-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
+// CHECK19-NEXT:    store i64 [[TMP19]], i64* [[TMP56]], align 4
+// CHECK19-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6
+// CHECK19-NEXT:    store i8* null, i8** [[TMP57]], align 4
+// CHECK19-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7
+// CHECK19-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to %struct.St**
+// CHECK19-NEXT:    store %struct.St* [[THIS1]], %struct.St** [[TMP59]], align 4
+// CHECK19-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7
+// CHECK19-NEXT:    [[TMP61:%.*]] = bitcast i8** [[TMP60]] to i32**
+// CHECK19-NEXT:    store i32* [[B2]], i32** [[TMP61]], align 4
+// CHECK19-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 7
+// CHECK19-NEXT:    store i8* null, i8** [[TMP62]], align 4
+// CHECK19-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8
+// CHECK19-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to %struct.St**
+// CHECK19-NEXT:    store %struct.St* [[THIS1]], %struct.St** [[TMP64]], align 4
+// CHECK19-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 8
+// CHECK19-NEXT:    [[TMP66:%.*]] = bitcast i8** [[TMP65]] to i32**
+// CHECK19-NEXT:    store i32* [[A3]], i32** [[TMP66]], align 4
+// CHECK19-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 8
 // CHECK19-NEXT:    store i8* null, i8** [[TMP67]], align 4
-// CHECK19-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8
-// CHECK19-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to %struct.St**
-// CHECK19-NEXT:    store %struct.St* [[THIS1]], %struct.St** [[TMP69]], align 4
-// CHECK19-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 8
-// CHECK19-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32**
-// CHECK19-NEXT:    store i32* [[A3]], i32** [[TMP71]], align 4
-// CHECK19-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK19-NEXT:    store i64 4, i64* [[TMP72]], align 4
-// CHECK19-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 8
-// CHECK19-NEXT:    store i8* null, i8** [[TMP73]], align 4
-// CHECK19-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 9
-// CHECK19-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP9]], i32* [[TMP75]], align 4
-// CHECK19-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 9
-// CHECK19-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i32*
-// CHECK19-NEXT:    store i32 [[TMP9]], i32* [[TMP77]], align 4
-// CHECK19-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK19-NEXT:    store i64 4, i64* [[TMP78]], align 4
-// CHECK19-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 9
-// CHECK19-NEXT:    store i8* null, i8** [[TMP79]], align 4
-// CHECK19-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK19-NEXT:    [[TMP83:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2St7St_funcEPS_iPe_l144.region_id, i32 10, i8** [[TMP80]], i8** [[TMP81]], i64* [[TMP82]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK19-NEXT:    [[TMP84:%.*]] = icmp ne i32 [[TMP83]], 0
-// CHECK19-NEXT:    br i1 [[TMP84]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK19-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 9
+// CHECK19-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP9]], i32* [[TMP69]], align 4
+// CHECK19-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 9
+// CHECK19-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
+// CHECK19-NEXT:    store i32 [[TMP9]], i32* [[TMP71]], align 4
+// CHECK19-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 9
+// CHECK19-NEXT:    store i8* null, i8** [[TMP72]], align 4
+// CHECK19-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK19-NEXT:    [[TMP76:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2St7St_funcEPS_iPe_l144.region_id, i32 10, i8** [[TMP73]], i8** [[TMP74]], i64* [[TMP75]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK19-NEXT:    [[TMP77:%.*]] = icmp ne i32 [[TMP76]], 0
+// CHECK19-NEXT:    br i1 [[TMP77]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK19:       omp_offload.failed:
 // CHECK19-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2St7St_funcEPS_iPe_l144(%struct.St* [[TMP6]], i32 [[TMP0]], x86_fp80* [[TMP7]], i32 [[TMP1]], i32 [[TMP2]], double* [[VLA]], %struct.St* [[THIS1]], i32 [[TMP9]]) #[[ATTR4]]
 // CHECK19-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK19:       omp_offload.cont:
-// CHECK19-NEXT:    [[TMP85:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP85]])
+// CHECK19-NEXT:    [[TMP78:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK19-NEXT:    call void @llvm.stackrestore(i8* [[TMP78]])
 // CHECK19-NEXT:    ret void
 //
 //
@@ -4814,14 +4736,14 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK20-NEXT:    [[TMP10:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]]
 // CHECK20-NEXT:    [[TMP11:%.*]] = mul nuw i32 [[TMP10]], 8
 // CHECK20-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
-// CHECK20-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP14:%.*]] = bitcast i8** [[TMP13]] to float**
-// CHECK20-NEXT:    store float* [[TMP5]], float** [[TMP14]], align 4
-// CHECK20-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to float**
-// CHECK20-NEXT:    store float* [[TMP5]], float** [[TMP16]], align 4
-// CHECK20-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 0, i64* [[TMP17]], align 4
+// CHECK20-NEXT:    [[TMP13:%.*]] = bitcast [8 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 bitcast ([8 x i64]* @.offload_sizes to i8*), i32 64, i1 false)
+// CHECK20-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP15:%.*]] = bitcast i8** [[TMP14]] to float**
+// CHECK20-NEXT:    store float* [[TMP5]], float** [[TMP15]], align 4
+// CHECK20-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to float**
+// CHECK20-NEXT:    store float* [[TMP5]], float** [[TMP17]], align 4
 // CHECK20-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK20-NEXT:    store i8* null, i8** [[TMP18]], align 4
 // CHECK20-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -4830,82 +4752,70 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK20-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK20-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to %struct.St**
 // CHECK20-NEXT:    store %struct.St* [[TMP6]], %struct.St** [[TMP22]], align 4
-// CHECK20-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 0, i64* [[TMP23]], align 4
-// CHECK20-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP24]], align 4
-// CHECK20-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP26]], align 4
-// CHECK20-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP28]], align 4
-// CHECK20-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 4, i64* [[TMP29]], align 4
-// CHECK20-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP30]], align 4
-// CHECK20-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP23]], align 4
+// CHECK20-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP25]], align 4
+// CHECK20-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP27:%.*]] = bitcast i8** [[TMP26]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP27]], align 4
+// CHECK20-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP28]], align 4
+// CHECK20-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP30:%.*]] = bitcast i8** [[TMP29]] to x86_fp80**
+// CHECK20-NEXT:    store x86_fp80* [[TMP7]], x86_fp80** [[TMP30]], align 4
+// CHECK20-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK20-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to x86_fp80**
 // CHECK20-NEXT:    store x86_fp80* [[TMP7]], x86_fp80** [[TMP32]], align 4
-// CHECK20-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to x86_fp80**
-// CHECK20-NEXT:    store x86_fp80* [[TMP7]], x86_fp80** [[TMP34]], align 4
-// CHECK20-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK20-NEXT:    store i64 0, i64* [[TMP35]], align 4
-// CHECK20-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK20-NEXT:    store i8* null, i8** [[TMP36]], align 4
-// CHECK20-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK20-NEXT:    [[TMP38:%.*]] = bitcast i8** [[TMP37]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP38]], align 4
-// CHECK20-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK20-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK20-NEXT:    store i8* null, i8** [[TMP33]], align 4
+// CHECK20-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK20-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP35]], align 4
+// CHECK20-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK20-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP37]], align 4
+// CHECK20-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK20-NEXT:    store i8* null, i8** [[TMP38]], align 4
+// CHECK20-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
 // CHECK20-NEXT:    [[TMP40:%.*]] = bitcast i8** [[TMP39]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP40]], align 4
-// CHECK20-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK20-NEXT:    store i64 4, i64* [[TMP41]], align 4
-// CHECK20-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK20-NEXT:    store i8* null, i8** [[TMP42]], align 4
-// CHECK20-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
-// CHECK20-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP2]], i32* [[TMP44]], align 4
-// CHECK20-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
-// CHECK20-NEXT:    [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP2]], i32* [[TMP46]], align 4
-// CHECK20-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK20-NEXT:    store i64 4, i64* [[TMP47]], align 4
-// CHECK20-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
-// CHECK20-NEXT:    store i8* null, i8** [[TMP48]], align 4
-// CHECK20-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
-// CHECK20-NEXT:    [[TMP50:%.*]] = bitcast i8** [[TMP49]] to double**
-// CHECK20-NEXT:    store double* [[VLA]], double** [[TMP50]], align 4
-// CHECK20-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
-// CHECK20-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to double**
-// CHECK20-NEXT:    store double* [[VLA]], double** [[TMP52]], align 4
-// CHECK20-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK20-NEXT:    store i64 [[TMP12]], i64* [[TMP53]], align 4
-// CHECK20-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6
+// CHECK20-NEXT:    store i32 [[TMP2]], i32* [[TMP40]], align 4
+// CHECK20-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK20-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP2]], i32* [[TMP42]], align 4
+// CHECK20-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
+// CHECK20-NEXT:    store i8* null, i8** [[TMP43]], align 4
+// CHECK20-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
+// CHECK20-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to double**
+// CHECK20-NEXT:    store double* [[VLA]], double** [[TMP45]], align 4
+// CHECK20-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
+// CHECK20-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to double**
+// CHECK20-NEXT:    store double* [[VLA]], double** [[TMP47]], align 4
+// CHECK20-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
+// CHECK20-NEXT:    store i64 [[TMP12]], i64* [[TMP48]], align 4
+// CHECK20-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6
+// CHECK20-NEXT:    store i8* null, i8** [[TMP49]], align 4
+// CHECK20-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7
+// CHECK20-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP9]], i32* [[TMP51]], align 4
+// CHECK20-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7
+// CHECK20-NEXT:    [[TMP53:%.*]] = bitcast i8** [[TMP52]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP9]], i32* [[TMP53]], align 4
+// CHECK20-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 7
 // CHECK20-NEXT:    store i8* null, i8** [[TMP54]], align 4
-// CHECK20-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7
-// CHECK20-NEXT:    [[TMP56:%.*]] = bitcast i8** [[TMP55]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP9]], i32* [[TMP56]], align 4
-// CHECK20-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7
-// CHECK20-NEXT:    [[TMP58:%.*]] = bitcast i8** [[TMP57]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP9]], i32* [[TMP58]], align 4
-// CHECK20-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK20-NEXT:    store i64 4, i64* [[TMP59]], align 4
-// CHECK20-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 7
-// CHECK20-NEXT:    store i8* null, i8** [[TMP60]], align 4
-// CHECK20-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP64:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z10array_funcPfP2StiPe_l152.region_id, i32 8, i8** [[TMP61]], i8** [[TMP62]], i64* [[TMP63]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP65:%.*]] = icmp ne i32 [[TMP64]], 0
-// CHECK20-NEXT:    br i1 [[TMP65]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK20-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP58:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z10array_funcPfP2StiPe_l152.region_id, i32 8, i8** [[TMP55]], i8** [[TMP56]], i64* [[TMP57]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP59:%.*]] = icmp ne i32 [[TMP58]], 0
+// CHECK20-NEXT:    br i1 [[TMP59]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK20:       omp_offload.failed:
 // CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z10array_funcPfP2StiPe_l152(float* [[TMP5]], %struct.St* [[TMP6]], i32 [[TMP0]], x86_fp80* [[TMP7]], i32 [[TMP1]], i32 [[TMP2]], double* [[VLA]], i32 [[TMP9]]) #[[ATTR4:[0-9]+]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK20:       omp_offload.cont:
-// CHECK20-NEXT:    [[TMP66:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP66]])
+// CHECK20-NEXT:    [[TMP60:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP60]])
 // CHECK20-NEXT:    ret void
 //
 //
@@ -5042,14 +4952,14 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK20-NEXT:    [[TMP17:%.*]] = ptrtoint i8* [[TMP14]] to i64
 // CHECK20-NEXT:    [[TMP18:%.*]] = sub i64 [[TMP16]], [[TMP17]]
 // CHECK20-NEXT:    [[TMP19:%.*]] = sdiv exact i64 [[TMP18]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CHECK20-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP21:%.*]] = bitcast i8** [[TMP20]] to %struct.St**
-// CHECK20-NEXT:    store %struct.St* [[TMP6]], %struct.St** [[TMP21]], align 4
-// CHECK20-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP23:%.*]] = bitcast i8** [[TMP22]] to %struct.St**
-// CHECK20-NEXT:    store %struct.St* [[TMP6]], %struct.St** [[TMP23]], align 4
-// CHECK20-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    store i64 0, i64* [[TMP24]], align 4
+// CHECK20-NEXT:    [[TMP20:%.*]] = bitcast [10 x i64]* [[DOTOFFLOAD_SIZES]] to i8*
+// CHECK20-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 bitcast ([10 x i64]* @.offload_sizes.2 to i8*), i32 80, i1 false)
+// CHECK20-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP22:%.*]] = bitcast i8** [[TMP21]] to %struct.St**
+// CHECK20-NEXT:    store %struct.St* [[TMP6]], %struct.St** [[TMP22]], align 4
+// CHECK20-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP24:%.*]] = bitcast i8** [[TMP23]] to %struct.St**
+// CHECK20-NEXT:    store %struct.St* [[TMP6]], %struct.St** [[TMP24]], align 4
 // CHECK20-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
 // CHECK20-NEXT:    store i8* null, i8** [[TMP25]], align 4
 // CHECK20-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
@@ -5058,102 +4968,88 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) {
 // CHECK20-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
 // CHECK20-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to i32*
 // CHECK20-NEXT:    store i32 [[TMP0]], i32* [[TMP29]], align 4
-// CHECK20-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1
-// CHECK20-NEXT:    store i64 4, i64* [[TMP30]], align 4
-// CHECK20-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK20-NEXT:    store i8* null, i8** [[TMP31]], align 4
-// CHECK20-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP33:%.*]] = bitcast i8** [[TMP32]] to x86_fp80**
-// CHECK20-NEXT:    store x86_fp80* [[TMP7]], x86_fp80** [[TMP33]], align 4
-// CHECK20-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
-// CHECK20-NEXT:    [[TMP35:%.*]] = bitcast i8** [[TMP34]] to x86_fp80**
-// CHECK20-NEXT:    store x86_fp80* [[TMP7]], x86_fp80** [[TMP35]], align 4
-// CHECK20-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 2
-// CHECK20-NEXT:    store i64 0, i64* [[TMP36]], align 4
-// CHECK20-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
-// CHECK20-NEXT:    store i8* null, i8** [[TMP37]], align 4
-// CHECK20-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK20-NEXT:    store i8* null, i8** [[TMP30]], align 4
+// CHECK20-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to x86_fp80**
+// CHECK20-NEXT:    store x86_fp80* [[TMP7]], x86_fp80** [[TMP32]], align 4
+// CHECK20-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK20-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to x86_fp80**
+// CHECK20-NEXT:    store x86_fp80* [[TMP7]], x86_fp80** [[TMP34]], align 4
+// CHECK20-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK20-NEXT:    store i8* null, i8** [[TMP35]], align 4
+// CHECK20-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
+// CHECK20-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP37]], align 4
+// CHECK20-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
 // CHECK20-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i32*
 // CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP39]], align 4
-// CHECK20-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
-// CHECK20-NEXT:    [[TMP41:%.*]] = bitcast i8** [[TMP40]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP1]], i32* [[TMP41]], align 4
-// CHECK20-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK20-NEXT:    store i64 4, i64* [[TMP42]], align 4
-// CHECK20-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
-// CHECK20-NEXT:    store i8* null, i8** [[TMP43]], align 4
-// CHECK20-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
-// CHECK20-NEXT:    [[TMP45:%.*]] = bitcast i8** [[TMP44]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP2]], i32* [[TMP45]], align 4
-// CHECK20-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
-// CHECK20-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP2]], i32* [[TMP47]], align 4
-// CHECK20-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4
-// CHECK20-NEXT:    store i64 4, i64* [[TMP48]], align 4
-// CHECK20-NEXT:    [[TMP49:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
-// CHECK20-NEXT:    store i8* null, i8** [[TMP49]], align 4
-// CHECK20-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
-// CHECK20-NEXT:    [[TMP51:%.*]] = bitcast i8** [[TMP50]] to double**
-// CHECK20-NEXT:    store double* [[VLA]], double** [[TMP51]], align 4
-// CHECK20-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
-// CHECK20-NEXT:    [[TMP53:%.*]] = bitcast i8** [[TMP52]] to double**
-// CHECK20-NEXT:    store double* [[VLA]], double** [[TMP53]], align 4
-// CHECK20-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
-// CHECK20-NEXT:    store i64 [[TMP12]], i64* [[TMP54]], align 4
-// CHECK20-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
-// CHECK20-NEXT:    store i8* null, i8** [[TMP55]], align 4
-// CHECK20-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
-// CHECK20-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to %struct.St**
-// CHECK20-NEXT:    store %struct.St* [[THIS1]], %struct.St** [[TMP57]], align 4
-// CHECK20-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
-// CHECK20-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to i32**
-// CHECK20-NEXT:    store i32* [[A3]], i32** [[TMP59]], align 4
-// CHECK20-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
-// CHECK20-NEXT:    store i64 [[TMP19]], i64* [[TMP60]], align 4
-// CHECK20-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6
-// CHECK20-NEXT:    store i8* null, i8** [[TMP61]], align 4
-// CHECK20-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7
-// CHECK20-NEXT:    [[TMP63:%.*]] = bitcast i8** [[TMP62]] to %struct.St**
-// CHECK20-NEXT:    store %struct.St* [[THIS1]], %struct.St** [[TMP63]], align 4
-// CHECK20-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7
-// CHECK20-NEXT:    [[TMP65:%.*]] = bitcast i8** [[TMP64]] to i32**
-// CHECK20-NEXT:    store i32* [[B2]], i32** [[TMP65]], align 4
-// CHECK20-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK20-NEXT:    store i64 4, i64* [[TMP66]], align 4
-// CHECK20-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 7
+// CHECK20-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3
+// CHECK20-NEXT:    store i8* null, i8** [[TMP40]], align 4
+// CHECK20-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
+// CHECK20-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP2]], i32* [[TMP42]], align 4
+// CHECK20-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
+// CHECK20-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP2]], i32* [[TMP44]], align 4
+// CHECK20-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4
+// CHECK20-NEXT:    store i8* null, i8** [[TMP45]], align 4
+// CHECK20-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
+// CHECK20-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to double**
+// CHECK20-NEXT:    store double* [[VLA]], double** [[TMP47]], align 4
+// CHECK20-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
+// CHECK20-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to double**
+// CHECK20-NEXT:    store double* [[VLA]], double** [[TMP49]], align 4
+// CHECK20-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 5
+// CHECK20-NEXT:    store i64 [[TMP12]], i64* [[TMP50]], align 4
+// CHECK20-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5
+// CHECK20-NEXT:    store i8* null, i8** [[TMP51]], align 4
+// CHECK20-NEXT:    [[TMP52:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
+// CHECK20-NEXT:    [[TMP53:%.*]] = bitcast i8** [[TMP52]] to %struct.St**
+// CHECK20-NEXT:    store %struct.St* [[THIS1]], %struct.St** [[TMP53]], align 4
+// CHECK20-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
+// CHECK20-NEXT:    [[TMP55:%.*]] = bitcast i8** [[TMP54]] to i32**
+// CHECK20-NEXT:    store i32* [[A3]], i32** [[TMP55]], align 4
+// CHECK20-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 6
+// CHECK20-NEXT:    store i64 [[TMP19]], i64* [[TMP56]], align 4
+// CHECK20-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6
+// CHECK20-NEXT:    store i8* null, i8** [[TMP57]], align 4
+// CHECK20-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7
+// CHECK20-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to %struct.St**
+// CHECK20-NEXT:    store %struct.St* [[THIS1]], %struct.St** [[TMP59]], align 4
+// CHECK20-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7
+// CHECK20-NEXT:    [[TMP61:%.*]] = bitcast i8** [[TMP60]] to i32**
+// CHECK20-NEXT:    store i32* [[B2]], i32** [[TMP61]], align 4
+// CHECK20-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 7
+// CHECK20-NEXT:    store i8* null, i8** [[TMP62]], align 4
+// CHECK20-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8
+// CHECK20-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to %struct.St**
+// CHECK20-NEXT:    store %struct.St* [[THIS1]], %struct.St** [[TMP64]], align 4
+// CHECK20-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 8
+// CHECK20-NEXT:    [[TMP66:%.*]] = bitcast i8** [[TMP65]] to i32**
+// CHECK20-NEXT:    store i32* [[A3]], i32** [[TMP66]], align 4
+// CHECK20-NEXT:    [[TMP67:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 8
 // CHECK20-NEXT:    store i8* null, i8** [[TMP67]], align 4
-// CHECK20-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8
-// CHECK20-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to %struct.St**
-// CHECK20-NEXT:    store %struct.St* [[THIS1]], %struct.St** [[TMP69]], align 4
-// CHECK20-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 8
-// CHECK20-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32**
-// CHECK20-NEXT:    store i32* [[A3]], i32** [[TMP71]], align 4
-// CHECK20-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 8
-// CHECK20-NEXT:    store i64 4, i64* [[TMP72]], align 4
-// CHECK20-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 8
-// CHECK20-NEXT:    store i8* null, i8** [[TMP73]], align 4
-// CHECK20-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 9
-// CHECK20-NEXT:    [[TMP75:%.*]] = bitcast i8** [[TMP74]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP9]], i32* [[TMP75]], align 4
-// CHECK20-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 9
-// CHECK20-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i32*
-// CHECK20-NEXT:    store i32 [[TMP9]], i32* [[TMP77]], align 4
-// CHECK20-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 9
-// CHECK20-NEXT:    store i64 4, i64* [[TMP78]], align 4
-// CHECK20-NEXT:    [[TMP79:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 9
-// CHECK20-NEXT:    store i8* null, i8** [[TMP79]], align 4
-// CHECK20-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK20-NEXT:    [[TMP83:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2St7St_funcEPS_iPe_l144.region_id, i32 10, i8** [[TMP80]], i8** [[TMP81]], i64* [[TMP82]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
-// CHECK20-NEXT:    [[TMP84:%.*]] = icmp ne i32 [[TMP83]], 0
-// CHECK20-NEXT:    br i1 [[TMP84]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK20-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 9
+// CHECK20-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP9]], i32* [[TMP69]], align 4
+// CHECK20-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 9
+// CHECK20-NEXT:    [[TMP71:%.*]] = bitcast i8** [[TMP70]] to i32*
+// CHECK20-NEXT:    store i32 [[TMP9]], i32* [[TMP71]], align 4
+// CHECK20-NEXT:    [[TMP72:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 9
+// CHECK20-NEXT:    store i8* null, i8** [[TMP72]], align 4
+// CHECK20-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [10 x i64], [10 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK20-NEXT:    [[TMP76:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2St7St_funcEPS_iPe_l144.region_id, i32 10, i8** [[TMP73]], i8** [[TMP74]], i64* [[TMP75]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* @.offload_maptypes.3, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0)
+// CHECK20-NEXT:    [[TMP77:%.*]] = icmp ne i32 [[TMP76]], 0
+// CHECK20-NEXT:    br i1 [[TMP77]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK20:       omp_offload.failed:
 // CHECK20-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2St7St_funcEPS_iPe_l144(%struct.St* [[TMP6]], i32 [[TMP0]], x86_fp80* [[TMP7]], i32 [[TMP1]], i32 [[TMP2]], double* [[VLA]], %struct.St* [[THIS1]], i32 [[TMP9]]) #[[ATTR4]]
 // CHECK20-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK20:       omp_offload.cont:
-// CHECK20-NEXT:    [[TMP85:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
-// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP85]])
+// CHECK20-NEXT:    [[TMP78:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4
+// CHECK20-NEXT:    call void @llvm.stackrestore(i8* [[TMP78]])
 // CHECK20-NEXT:    ret void
 //
 //


        


More information about the cfe-commits mailing list