[clang] [Clang][OpenMP] Clang adding the addrSpace according to DataLayout fix (PR #65483)

Aaron Jarmusch via cfe-commits cfe-commits at lists.llvm.org
Tue Sep 12 12:40:52 PDT 2023


https://github.com/ajarmusch updated https://github.com/llvm/llvm-project/pull/65483:

>From 5a56cffb1e53e8c7415bf468a7edfb464fbc08a0 Mon Sep 17 00:00:00 2001
From: Aaron Jarmusch <jarmusch at udel.edu>
Date: Wed, 6 Sep 2023 13:56:16 +0000
Subject: [PATCH 1/4] [Clang][OpenMP] Clang adding the addrSpace according to
 DataLayout fix

---
 clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 1ec9bb3d5234714..927cc5215d9a1e5 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -3362,6 +3362,8 @@ Address CGOpenMPRuntimeGPU::getAddressOfLocalVariable(CodeGenFunction &CGF,
       break;
     case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
     case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
+      if (VD->hasGlobalStorage()) 
+        AS = getLangASFromTargetAS(CGF.CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
       break;
     }
     llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());

>From a2403c43f6841fed44412fbf178808d3962c12d1 Mon Sep 17 00:00:00 2001
From: Aaron Jarmusch <jarmusch at udel.edu>
Date: Wed, 6 Sep 2023 14:48:05 +0000
Subject: [PATCH 2/4] fixup! [Clang][OpenMP] Clang adding the addrSpace
 according to DataLayout fix

---
 clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 927cc5215d9a1e5..2e022b5cdf762f4 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -3362,8 +3362,7 @@ Address CGOpenMPRuntimeGPU::getAddressOfLocalVariable(CodeGenFunction &CGF,
       break;
     case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
     case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
-      if (VD->hasGlobalStorage()) 
-        AS = getLangASFromTargetAS(CGF.CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
+      AS = getLangASFromTargetAS(CGF.CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
       break;
     }
     llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());

>From 83d92c8a4f471b25c66e934c0da8ba33b3599c1c Mon Sep 17 00:00:00 2001
From: Aaron Jarmusch <jarmusch at udel.edu>
Date: Mon, 11 Sep 2023 19:05:10 +0000
Subject: [PATCH 3/4] fixup! fixup! [Clang][OpenMP] Clang adding the addrSpace
 according to DataLayout fix

---
 ...rget_uses_allocators_large_cap_codegen.cpp | 580 ++++++++++++++++++
 1 file changed, 580 insertions(+)
 create mode 100644 clang/test/OpenMP/test_target_uses_allocators_large_cap_codegen.cpp

diff --git a/clang/test/OpenMP/test_target_uses_allocators_large_cap_codegen.cpp b/clang/test/OpenMP/test_target_uses_allocators_large_cap_codegen.cpp
new file mode 100644
index 000000000000000..414b852e842b4b8
--- /dev/null
+++ b/clang/test/OpenMP/test_target_uses_allocators_large_cap_codegen.cpp
@@ -0,0 +1,580 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 3
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK1
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK3
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK3
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK9
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK9
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK11
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK11
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
+
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK1
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK3
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK3
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK9
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK9
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK11
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK11
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+#define N 1024
+
+enum omp_allocator_handle_t {
+  omp_null_allocator = 0,
+  omp_default_mem_alloc = 1,
+  omp_large_cap_mem_alloc = 2,
+  omp_const_mem_alloc = 3,
+  omp_high_bw_mem_alloc = 4,
+  omp_low_lat_mem_alloc = 5,
+  omp_cgroup_mem_alloc = 6,
+  omp_pteam_mem_alloc = 7,
+  omp_thread_mem_alloc = 8,
+  KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__
+};
+
+
+int test_uses_allocators_large_cap() {
+  int errors = 0;
+  int x = 0;
+  int device_result = 0;
+  int result = 0;
+
+  for (int i = 0; i < N; i++) {
+    for (int j = 0; j < N; j++) {
+      result += j + i ;
+    }
+  }
+
+#pragma omp target uses_allocators(omp_large_cap_mem_alloc) allocate(omp_large_cap_mem_alloc: x) firstprivate(x) map(from: device_result)
+{
+  for (int i = 0; i < N; i++) {
+    for (int j = 0; j < N; j++) {
+      x += j + i;
+    }
+  }
+    device_result = x;
+}
+
+  if (result != device_result){
+    errors += 1;
+  }
+
+  return errors;
+}
+
+#endif
+// CHECK1-LABEL: define dso_local noundef signext i32 @_Z30test_uses_allocators_large_capv(
+// CHECK1-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[ERRORS:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[X:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[DEVICE_RESULT:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[RESULT:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[X_CASTED:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
+// CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
+// CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
+// CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT:    store i32 0, ptr [[ERRORS]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[X]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[DEVICE_RESULT]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[RESULT]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK1:       for.cond:
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 1024
+// CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]]
+// CHECK1:       for.body:
+// CHECK1-NEXT:    store i32 0, ptr [[J]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND1:%.*]]
+// CHECK1:       for.cond1:
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP1]], 1024
+// CHECK1-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]]
+// CHECK1:       for.body3:
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[RESULT]], align 4
+// CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP4]], [[ADD]]
+// CHECK1-NEXT:    store i32 [[ADD4]], ptr [[RESULT]], align 4
+// CHECK1-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK1:       for.inc:
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP5]], 1
+// CHECK1-NEXT:    store i32 [[INC]], ptr [[J]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND1]], !llvm.loop [[LOOP4:![0-9]+]]
+// CHECK1:       for.end:
+// CHECK1-NEXT:    br label [[FOR_INC5:%.*]]
+// CHECK1:       for.inc5:
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    [[INC6:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK1-NEXT:    store i32 [[INC6]], ptr [[I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK1:       for.end7:
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[X]], align 4
+// CHECK1-NEXT:    store i32 [[TMP7]], ptr [[X_CASTED]], align 4
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i64, ptr [[X_CASTED]], align 8
+// CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT:    store i64 [[TMP8]], ptr [[TMP9]], align 8
+// CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT:    store i64 [[TMP8]], ptr [[TMP10]], align 8
+// CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT:    store ptr null, ptr [[TMP11]], align 8
+// CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK1-NEXT:    store ptr [[DEVICE_RESULT]], ptr [[TMP12]], align 8
+// CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK1-NEXT:    store ptr [[DEVICE_RESULT]], ptr [[TMP13]], align 8
+// CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK1-NEXT:    store ptr null, ptr [[TMP14]], align 8
+// CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 2, ptr [[TMP17]], align 4
+// CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT:    store i32 2, ptr [[TMP18]], align 4
+// CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT:    store ptr [[TMP15]], ptr [[TMP19]], align 8
+// CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT:    store ptr [[TMP16]], ptr [[TMP20]], align 8
+// CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT:    store ptr @.offload_sizes, ptr [[TMP21]], align 8
+// CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT:    store ptr @.offload_maptypes, ptr [[TMP22]], align 8
+// CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT:    store ptr null, ptr [[TMP23]], align 8
+// CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT:    store ptr null, ptr [[TMP24]], align 8
+// CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT:    store i64 0, ptr [[TMP25]], align 8
+// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// CHECK1-NEXT:    store i64 0, ptr [[TMP26]], align 8
+// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// CHECK1-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP27]], align 4
+// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// CHECK1-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP28]], align 4
+// CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// CHECK1-NEXT:    store i32 0, ptr [[TMP29]], align 4
+// CHECK1-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_uses_allocators_large_capv_l106.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK1-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1:       omp_offload.failed:
+// CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_uses_allocators_large_capv_l106(i64 [[TMP8]], ptr [[DEVICE_RESULT]]) #[[ATTR2:[0-9]+]]
+// CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
+// CHECK1:       omp_offload.cont:
+// CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[RESULT]], align 4
+// CHECK1-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DEVICE_RESULT]], align 4
+// CHECK1-NEXT:    [[CMP8:%.*]] = icmp ne i32 [[TMP32]], [[TMP33]]
+// CHECK1-NEXT:    br i1 [[CMP8]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+// CHECK1:       if.then:
+// CHECK1-NEXT:    [[TMP34:%.*]] = load i32, ptr [[ERRORS]], align 4
+// CHECK1-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP34]], 1
+// CHECK1-NEXT:    store i32 [[ADD9]], ptr [[ERRORS]], align 4
+// CHECK1-NEXT:    br label [[IF_END]]
+// CHECK1:       if.end:
+// CHECK1-NEXT:    [[TMP35:%.*]] = load i32, ptr [[ERRORS]], align 4
+// CHECK1-NEXT:    ret i32 [[TMP35]]
+//
+//
+// CHECK1-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_uses_allocators_large_capv_l106(
+// CHECK1-SAME: i64 noundef [[X:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[DEVICE_RESULT:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    [[X_ADDR:%.*]] = alloca i64, align 8
+// CHECK1-NEXT:    [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT:    store i64 [[X]], ptr [[X_ADDR]], align 8
+// CHECK1-NEXT:    store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 8
+// CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8
+// CHECK1-NEXT:    [[DOTX__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP0]], i64 4, ptr inttoptr (i64 2 to ptr))
+// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP2]], ptr [[DOTX__VOID_ADDR]], align 4
+// CHECK1-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK1:       for.cond:
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP3]], 1024
+// CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]]
+// CHECK1:       for.body:
+// CHECK1-NEXT:    store i32 0, ptr [[J]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND1:%.*]]
+// CHECK1:       for.cond1:
+// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP4]], 1024
+// CHECK1-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]]
+// CHECK1:       for.body3:
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4
+// CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[ADD]]
+// CHECK1-NEXT:    store i32 [[ADD4]], ptr [[DOTX__VOID_ADDR]], align 4
+// CHECK1-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK1:       for.inc:
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK1-NEXT:    store i32 [[INC]], ptr [[J]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND1]], !llvm.loop [[LOOP7:![0-9]+]]
+// CHECK1:       for.end:
+// CHECK1-NEXT:    br label [[FOR_INC5:%.*]]
+// CHECK1:       for.inc5:
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT:    [[INC6:%.*]] = add nsw i32 [[TMP9]], 1
+// CHECK1-NEXT:    store i32 [[INC6]], ptr [[I]], align 4
+// CHECK1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
+// CHECK1:       for.end7:
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP10]], ptr [[TMP1]], align 4
+// CHECK1-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTX__VOID_ADDR]], ptr inttoptr (i64 2 to ptr))
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK1-LABEL: define internal void @.omp_offloading.requires_reg(
+// CHECK1-SAME: ) #[[ATTR3:[0-9]+]] {
+// CHECK1-NEXT:  entry:
+// CHECK1-NEXT:    call void @__tgt_register_requires(i64 1)
+// CHECK1-NEXT:    ret void
+//
+//
+// CHECK3-LABEL: define dso_local noundef i32 @_Z30test_uses_allocators_large_capv(
+// CHECK3-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    [[ERRORS:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[X:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[DEVICE_RESULT:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[RESULT:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[X_CASTED:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 4
+// CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 4
+// CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4
+// CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK3-NEXT:    store i32 0, ptr [[ERRORS]], align 4
+// CHECK3-NEXT:    store i32 0, ptr [[X]], align 4
+// CHECK3-NEXT:    store i32 0, ptr [[DEVICE_RESULT]], align 4
+// CHECK3-NEXT:    store i32 0, ptr [[RESULT]], align 4
+// CHECK3-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK3-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK3:       for.cond:
+// CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I]], align 4
+// CHECK3-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 1024
+// CHECK3-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]]
+// CHECK3:       for.body:
+// CHECK3-NEXT:    store i32 0, ptr [[J]], align 4
+// CHECK3-NEXT:    br label [[FOR_COND1:%.*]]
+// CHECK3:       for.cond1:
+// CHECK3-NEXT:    [[TMP1:%.*]] = load i32, ptr [[J]], align 4
+// CHECK3-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP1]], 1024
+// CHECK3-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]]
+// CHECK3:       for.body3:
+// CHECK3-NEXT:    [[TMP2:%.*]] = load i32, ptr [[J]], align 4
+// CHECK3-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I]], align 4
+// CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
+// CHECK3-NEXT:    [[TMP4:%.*]] = load i32, ptr [[RESULT]], align 4
+// CHECK3-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP4]], [[ADD]]
+// CHECK3-NEXT:    store i32 [[ADD4]], ptr [[RESULT]], align 4
+// CHECK3-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK3:       for.inc:
+// CHECK3-NEXT:    [[TMP5:%.*]] = load i32, ptr [[J]], align 4
+// CHECK3-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP5]], 1
+// CHECK3-NEXT:    store i32 [[INC]], ptr [[J]], align 4
+// CHECK3-NEXT:    br label [[FOR_COND1]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK3:       for.end:
+// CHECK3-NEXT:    br label [[FOR_INC5:%.*]]
+// CHECK3:       for.inc5:
+// CHECK3-NEXT:    [[TMP6:%.*]] = load i32, ptr [[I]], align 4
+// CHECK3-NEXT:    [[INC6:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK3-NEXT:    store i32 [[INC6]], ptr [[I]], align 4
+// CHECK3-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
+// CHECK3:       for.end7:
+// CHECK3-NEXT:    [[TMP7:%.*]] = load i32, ptr [[X]], align 4
+// CHECK3-NEXT:    store i32 [[TMP7]], ptr [[X_CASTED]], align 4
+// CHECK3-NEXT:    [[TMP8:%.*]] = load i32, ptr [[X_CASTED]], align 4
+// CHECK3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT:    store i32 [[TMP8]], ptr [[TMP9]], align 4
+// CHECK3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT:    store i32 [[TMP8]], ptr [[TMP10]], align 4
+// CHECK3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK3-NEXT:    store ptr null, ptr [[TMP11]], align 4
+// CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK3-NEXT:    store ptr [[DEVICE_RESULT]], ptr [[TMP12]], align 4
+// CHECK3-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK3-NEXT:    store ptr [[DEVICE_RESULT]], ptr [[TMP13]], align 4
+// CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK3-NEXT:    store ptr null, ptr [[TMP14]], align 4
+// CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT:    store i32 2, ptr [[TMP17]], align 4
+// CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT:    store i32 2, ptr [[TMP18]], align 4
+// CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT:    store ptr [[TMP15]], ptr [[TMP19]], align 4
+// CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT:    store ptr [[TMP16]], ptr [[TMP20]], align 4
+// CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT:    store ptr @.offload_sizes, ptr [[TMP21]], align 4
+// CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT:    store ptr @.offload_maptypes, ptr [[TMP22]], align 4
+// CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT:    store ptr null, ptr [[TMP23]], align 4
+// CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT:    store ptr null, ptr [[TMP24]], align 4
+// CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK3-NEXT:    store i64 0, ptr [[TMP25]], align 8
+// CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// CHECK3-NEXT:    store i64 0, ptr [[TMP26]], align 8
+// CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// CHECK3-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP27]], align 4
+// CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// CHECK3-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP28]], align 4
+// CHECK3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// CHECK3-NEXT:    store i32 0, ptr [[TMP29]], align 4
+// CHECK3-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_uses_allocators_large_capv_l106.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
+// CHECK3-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3:       omp_offload.failed:
+// CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_uses_allocators_large_capv_l106(i32 [[TMP8]], ptr [[DEVICE_RESULT]]) #[[ATTR2:[0-9]+]]
+// CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
+// CHECK3:       omp_offload.cont:
+// CHECK3-NEXT:    [[TMP32:%.*]] = load i32, ptr [[RESULT]], align 4
+// CHECK3-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DEVICE_RESULT]], align 4
+// CHECK3-NEXT:    [[CMP8:%.*]] = icmp ne i32 [[TMP32]], [[TMP33]]
+// CHECK3-NEXT:    br i1 [[CMP8]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+// CHECK3:       if.then:
+// CHECK3-NEXT:    [[TMP34:%.*]] = load i32, ptr [[ERRORS]], align 4
+// CHECK3-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP34]], 1
+// CHECK3-NEXT:    store i32 [[ADD9]], ptr [[ERRORS]], align 4
+// CHECK3-NEXT:    br label [[IF_END]]
+// CHECK3:       if.end:
+// CHECK3-NEXT:    [[TMP35:%.*]] = load i32, ptr [[ERRORS]], align 4
+// CHECK3-NEXT:    ret i32 [[TMP35]]
+//
+//
+// CHECK3-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_uses_allocators_large_capv_l106(
+// CHECK3-SAME: i32 noundef [[X:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[DEVICE_RESULT:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK3-NEXT:    store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 4
+// CHECK3-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 4
+// CHECK3-NEXT:    [[DOTX__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP0]], i32 4, ptr inttoptr (i32 2 to ptr))
+// CHECK3-NEXT:    [[TMP2:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK3-NEXT:    store i32 [[TMP2]], ptr [[DOTX__VOID_ADDR]], align 4
+// CHECK3-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK3-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK3:       for.cond:
+// CHECK3-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I]], align 4
+// CHECK3-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP3]], 1024
+// CHECK3-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]]
+// CHECK3:       for.body:
+// CHECK3-NEXT:    store i32 0, ptr [[J]], align 4
+// CHECK3-NEXT:    br label [[FOR_COND1:%.*]]
+// CHECK3:       for.cond1:
+// CHECK3-NEXT:    [[TMP4:%.*]] = load i32, ptr [[J]], align 4
+// CHECK3-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP4]], 1024
+// CHECK3-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]]
+// CHECK3:       for.body3:
+// CHECK3-NEXT:    [[TMP5:%.*]] = load i32, ptr [[J]], align 4
+// CHECK3-NEXT:    [[TMP6:%.*]] = load i32, ptr [[I]], align 4
+// CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
+// CHECK3-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4
+// CHECK3-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[ADD]]
+// CHECK3-NEXT:    store i32 [[ADD4]], ptr [[DOTX__VOID_ADDR]], align 4
+// CHECK3-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK3:       for.inc:
+// CHECK3-NEXT:    [[TMP8:%.*]] = load i32, ptr [[J]], align 4
+// CHECK3-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK3-NEXT:    store i32 [[INC]], ptr [[J]], align 4
+// CHECK3-NEXT:    br label [[FOR_COND1]], !llvm.loop [[LOOP8:![0-9]+]]
+// CHECK3:       for.end:
+// CHECK3-NEXT:    br label [[FOR_INC5:%.*]]
+// CHECK3:       for.inc5:
+// CHECK3-NEXT:    [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK3-NEXT:    [[INC6:%.*]] = add nsw i32 [[TMP9]], 1
+// CHECK3-NEXT:    store i32 [[INC6]], ptr [[I]], align 4
+// CHECK3-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]]
+// CHECK3:       for.end7:
+// CHECK3-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4
+// CHECK3-NEXT:    store i32 [[TMP10]], ptr [[TMP1]], align 4
+// CHECK3-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTX__VOID_ADDR]], ptr inttoptr (i32 2 to ptr))
+// CHECK3-NEXT:    ret void
+//
+//
+// CHECK3-LABEL: define internal void @.omp_offloading.requires_reg(
+// CHECK3-SAME: ) #[[ATTR3:[0-9]+]] {
+// CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    call void @__tgt_register_requires(i64 1)
+// CHECK3-NEXT:    ret void
+//
+//
+// CHECK9-LABEL: define weak_odr protected void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_uses_allocators_large_capv_l106(
+// CHECK9-SAME: i64 noundef [[X:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[DEVICE_RESULT:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK9-NEXT:  entry:
+// CHECK9-NEXT:    [[X_ADDR:%.*]] = alloca i64, align 8
+// CHECK9-NEXT:    [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK9-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK9-NEXT:    store i64 [[X]], ptr [[X_ADDR]], align 8
+// CHECK9-NEXT:    store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 8
+// CHECK9-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8
+// CHECK9-NEXT:    [[DOTX__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP0]], i64 4, ptr inttoptr (i64 2 to ptr))
+// CHECK9-NEXT:    [[TMP2:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK9-NEXT:    store i32 [[TMP2]], ptr [[DOTX__VOID_ADDR]], align 4
+// CHECK9-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK9-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK9:       for.cond:
+// CHECK9-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I]], align 4
+// CHECK9-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP3]], 1024
+// CHECK9-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]]
+// CHECK9:       for.body:
+// CHECK9-NEXT:    store i32 0, ptr [[J]], align 4
+// CHECK9-NEXT:    br label [[FOR_COND1:%.*]]
+// CHECK9:       for.cond1:
+// CHECK9-NEXT:    [[TMP4:%.*]] = load i32, ptr [[J]], align 4
+// CHECK9-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP4]], 1024
+// CHECK9-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]]
+// CHECK9:       for.body3:
+// CHECK9-NEXT:    [[TMP5:%.*]] = load i32, ptr [[J]], align 4
+// CHECK9-NEXT:    [[TMP6:%.*]] = load i32, ptr [[I]], align 4
+// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
+// CHECK9-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4
+// CHECK9-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[ADD]]
+// CHECK9-NEXT:    store i32 [[ADD4]], ptr [[DOTX__VOID_ADDR]], align 4
+// CHECK9-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK9:       for.inc:
+// CHECK9-NEXT:    [[TMP8:%.*]] = load i32, ptr [[J]], align 4
+// CHECK9-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK9-NEXT:    store i32 [[INC]], ptr [[J]], align 4
+// CHECK9-NEXT:    br label [[FOR_COND1]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK9:       for.end:
+// CHECK9-NEXT:    br label [[FOR_INC5:%.*]]
+// CHECK9:       for.inc5:
+// CHECK9-NEXT:    [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK9-NEXT:    [[INC6:%.*]] = add nsw i32 [[TMP9]], 1
+// CHECK9-NEXT:    store i32 [[INC6]], ptr [[I]], align 4
+// CHECK9-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
+// CHECK9:       for.end7:
+// CHECK9-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4
+// CHECK9-NEXT:    store i32 [[TMP10]], ptr [[TMP1]], align 4
+// CHECK9-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTX__VOID_ADDR]], ptr inttoptr (i64 2 to ptr))
+// CHECK9-NEXT:    ret void
+//
+//
+// CHECK11-LABEL: define weak_odr protected void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_uses_allocators_large_capv_l106(
+// CHECK11-SAME: i32 noundef [[X:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[DEVICE_RESULT:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK11-NEXT:  entry:
+// CHECK11-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[J:%.*]] = alloca i32, align 4
+// CHECK11-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK11-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK11-NEXT:    store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 4
+// CHECK11-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 4
+// CHECK11-NEXT:    [[DOTX__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP0]], i32 4, ptr inttoptr (i32 2 to ptr))
+// CHECK11-NEXT:    [[TMP2:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK11-NEXT:    store i32 [[TMP2]], ptr [[DOTX__VOID_ADDR]], align 4
+// CHECK11-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK11-NEXT:    br label [[FOR_COND:%.*]]
+// CHECK11:       for.cond:
+// CHECK11-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I]], align 4
+// CHECK11-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP3]], 1024
+// CHECK11-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]]
+// CHECK11:       for.body:
+// CHECK11-NEXT:    store i32 0, ptr [[J]], align 4
+// CHECK11-NEXT:    br label [[FOR_COND1:%.*]]
+// CHECK11:       for.cond1:
+// CHECK11-NEXT:    [[TMP4:%.*]] = load i32, ptr [[J]], align 4
+// CHECK11-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP4]], 1024
+// CHECK11-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]]
+// CHECK11:       for.body3:
+// CHECK11-NEXT:    [[TMP5:%.*]] = load i32, ptr [[J]], align 4
+// CHECK11-NEXT:    [[TMP6:%.*]] = load i32, ptr [[I]], align 4
+// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
+// CHECK11-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4
+// CHECK11-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[ADD]]
+// CHECK11-NEXT:    store i32 [[ADD4]], ptr [[DOTX__VOID_ADDR]], align 4
+// CHECK11-NEXT:    br label [[FOR_INC:%.*]]
+// CHECK11:       for.inc:
+// CHECK11-NEXT:    [[TMP8:%.*]] = load i32, ptr [[J]], align 4
+// CHECK11-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP8]], 1
+// CHECK11-NEXT:    store i32 [[INC]], ptr [[J]], align 4
+// CHECK11-NEXT:    br label [[FOR_COND1]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK11:       for.end:
+// CHECK11-NEXT:    br label [[FOR_INC5:%.*]]
+// CHECK11:       for.inc5:
+// CHECK11-NEXT:    [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK11-NEXT:    [[INC6:%.*]] = add nsw i32 [[TMP9]], 1
+// CHECK11-NEXT:    store i32 [[INC6]], ptr [[I]], align 4
+// CHECK11-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
+// CHECK11:       for.end7:
+// CHECK11-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4
+// CHECK11-NEXT:    store i32 [[TMP10]], ptr [[TMP1]], align 4
+// CHECK11-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTX__VOID_ADDR]], ptr inttoptr (i32 2 to ptr))
+// CHECK11-NEXT:    ret void
+//

>From 83bdf21ba8258cf70b1d37f16e36c6eb77882319 Mon Sep 17 00:00:00 2001
From: Aaron Jarmusch <jarmusch at udel.edu>
Date: Tue, 12 Sep 2023 14:48:05 +0000
Subject: [PATCH 4/4] fixup! fixup! [Clang][OpenMP] Clang adding the addrSpace
 according to DataLayout fix

---
 ...rget_uses_allocators_large_cap_codegen.cpp | 510 ++++--------------
 1 file changed, 113 insertions(+), 397 deletions(-)

diff --git a/clang/test/OpenMP/test_target_uses_allocators_large_cap_codegen.cpp b/clang/test/OpenMP/test_target_uses_allocators_large_cap_codegen.cpp
index 414b852e842b4b8..3ec4ba4c075a263 100644
--- a/clang/test/OpenMP/test_target_uses_allocators_large_cap_codegen.cpp
+++ b/clang/test/OpenMP/test_target_uses_allocators_large_cap_codegen.cpp
@@ -88,201 +88,91 @@ enum omp_allocator_handle_t {
 
 
 int test_uses_allocators_large_cap() {
-  int errors = 0;
   int x = 0;
   int device_result = 0;
-  int result = 0;
-
-  for (int i = 0; i < N; i++) {
-    for (int j = 0; j < N; j++) {
-      result += j + i ;
-    }
-  }
 
 #pragma omp target uses_allocators(omp_large_cap_mem_alloc) allocate(omp_large_cap_mem_alloc: x) firstprivate(x) map(from: device_result)
 {
-  for (int i = 0; i < N; i++) {
-    for (int j = 0; j < N; j++) {
-      x += j + i;
-    }
-  }
-    device_result = x;
 }
 
-  if (result != device_result){
-    errors += 1;
-  }
-
-  return errors;
+  return 0;
 }
 
 #endif
 // CHECK1-LABEL: define dso_local noundef signext i32 @_Z30test_uses_allocators_large_capv(
 // CHECK1-SAME: ) #[[ATTR0:[0-9]+]] {
 // CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[ERRORS:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[X:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DEVICE_RESULT:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[RESULT:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[J:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[X_CASTED:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
 // CHECK1-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK1-NEXT:    store i32 0, ptr [[ERRORS]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[X]], align 4
 // CHECK1-NEXT:    store i32 0, ptr [[DEVICE_RESULT]], align 4
-// CHECK1-NEXT:    store i32 0, ptr [[RESULT]], align 4
-// CHECK1-NEXT:    store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK1:       for.cond:
-// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 1024
-// CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]]
-// CHECK1:       for.body:
-// CHECK1-NEXT:    store i32 0, ptr [[J]], align 4
-// CHECK1-NEXT:    br label [[FOR_COND1:%.*]]
-// CHECK1:       for.cond1:
-// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[J]], align 4
-// CHECK1-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP1]], 1024
-// CHECK1-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]]
-// CHECK1:       for.body3:
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[J]], align 4
-// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[RESULT]], align 4
-// CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP4]], [[ADD]]
-// CHECK1-NEXT:    store i32 [[ADD4]], ptr [[RESULT]], align 4
-// CHECK1-NEXT:    br label [[FOR_INC:%.*]]
-// CHECK1:       for.inc:
-// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[J]], align 4
-// CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP5]], 1
-// CHECK1-NEXT:    store i32 [[INC]], ptr [[J]], align 4
-// CHECK1-NEXT:    br label [[FOR_COND1]], !llvm.loop [[LOOP4:![0-9]+]]
-// CHECK1:       for.end:
-// CHECK1-NEXT:    br label [[FOR_INC5:%.*]]
-// CHECK1:       for.inc5:
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT:    [[INC6:%.*]] = add nsw i32 [[TMP6]], 1
-// CHECK1-NEXT:    store i32 [[INC6]], ptr [[I]], align 4
-// CHECK1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
-// CHECK1:       for.end7:
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[X]], align 4
-// CHECK1-NEXT:    store i32 [[TMP7]], ptr [[X_CASTED]], align 4
-// CHECK1-NEXT:    [[TMP8:%.*]] = load i64, ptr [[X_CASTED]], align 8
-// CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT:    store i64 [[TMP8]], ptr [[TMP9]], align 8
-// CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    store i64 [[TMP8]], ptr [[TMP10]], align 8
-// CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
-// CHECK1-NEXT:    store ptr null, ptr [[TMP11]], align 8
-// CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK1-NEXT:    store ptr [[DEVICE_RESULT]], ptr [[TMP12]], align 8
-// CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK1-NEXT:    store ptr [[DEVICE_RESULT]], ptr [[TMP13]], align 8
-// CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
-// CHECK1-NEXT:    store ptr null, ptr [[TMP14]], align 8
-// CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK1-NEXT:    store i32 2, ptr [[TMP17]], align 4
-// CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK1-NEXT:    store i32 2, ptr [[TMP18]], align 4
-// CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK1-NEXT:    store ptr [[TMP15]], ptr [[TMP19]], align 8
-// CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK1-NEXT:    store ptr [[TMP16]], ptr [[TMP20]], align 8
-// CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK1-NEXT:    store ptr @.offload_sizes, ptr [[TMP21]], align 8
-// CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK1-NEXT:    store ptr @.offload_maptypes, ptr [[TMP22]], align 8
-// CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK1-NEXT:    store ptr null, ptr [[TMP23]], align 8
-// CHECK1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK1-NEXT:    store ptr null, ptr [[TMP24]], align 8
-// CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK1-NEXT:    store i64 0, ptr [[TMP25]], align 8
-// CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
-// CHECK1-NEXT:    store i64 0, ptr [[TMP26]], align 8
-// CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
-// CHECK1-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP27]], align 4
-// CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
-// CHECK1-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP28]], align 4
-// CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
-// CHECK1-NEXT:    store i32 0, ptr [[TMP29]], align 4
-// CHECK1-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_uses_allocators_large_capv_l106.region_id, ptr [[KERNEL_ARGS]])
-// CHECK1-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
-// CHECK1-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4
+// CHECK1-NEXT:    store i32 [[TMP0]], ptr [[X_CASTED]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i64, ptr [[X_CASTED]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT:    store i64 [[TMP1]], ptr [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT:    store i64 [[TMP1]], ptr [[TMP3]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT:    store ptr null, ptr [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK1-NEXT:    store ptr [[DEVICE_RESULT]], ptr [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK1-NEXT:    store ptr [[DEVICE_RESULT]], ptr [[TMP6]], align 8
+// CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK1-NEXT:    store ptr null, ptr [[TMP7]], align 8
+// CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT:    store i32 2, ptr [[TMP10]], align 4
+// CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT:    store i32 2, ptr [[TMP11]], align 4
+// CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT:    store ptr [[TMP8]], ptr [[TMP12]], align 8
+// CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT:    store ptr [[TMP9]], ptr [[TMP13]], align 8
+// CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT:    store ptr @.offload_sizes, ptr [[TMP14]], align 8
+// CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT:    store ptr @.offload_maptypes, ptr [[TMP15]], align 8
+// CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT:    store ptr null, ptr [[TMP16]], align 8
+// CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT:    store ptr null, ptr [[TMP17]], align 8
+// CHECK1-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT:    store i64 0, ptr [[TMP18]], align 8
+// CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// CHECK1-NEXT:    store i64 0, ptr [[TMP19]], align 8
+// CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// CHECK1-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP20]], align 4
+// CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// CHECK1-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP21]], align 4
+// CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// CHECK1-NEXT:    store i32 0, ptr [[TMP22]], align 4
+// CHECK1-NEXT:    [[TMP23:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_uses_allocators_large_capv_l102.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
+// CHECK1-NEXT:    br i1 [[TMP24]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK1:       omp_offload.failed:
-// CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_uses_allocators_large_capv_l106(i64 [[TMP8]], ptr [[DEVICE_RESULT]]) #[[ATTR2:[0-9]+]]
+// CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_uses_allocators_large_capv_l102(i64 [[TMP1]]) #[[ATTR2:[0-9]+]]
 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK1:       omp_offload.cont:
-// CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[RESULT]], align 4
-// CHECK1-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DEVICE_RESULT]], align 4
-// CHECK1-NEXT:    [[CMP8:%.*]] = icmp ne i32 [[TMP32]], [[TMP33]]
-// CHECK1-NEXT:    br i1 [[CMP8]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-// CHECK1:       if.then:
-// CHECK1-NEXT:    [[TMP34:%.*]] = load i32, ptr [[ERRORS]], align 4
-// CHECK1-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP34]], 1
-// CHECK1-NEXT:    store i32 [[ADD9]], ptr [[ERRORS]], align 4
-// CHECK1-NEXT:    br label [[IF_END]]
-// CHECK1:       if.end:
-// CHECK1-NEXT:    [[TMP35:%.*]] = load i32, ptr [[ERRORS]], align 4
-// CHECK1-NEXT:    ret i32 [[TMP35]]
+// CHECK1-NEXT:    ret i32 0
 //
 //
-// CHECK1-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_uses_allocators_large_capv_l106(
-// CHECK1-SAME: i64 noundef [[X:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[DEVICE_RESULT:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK1-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_uses_allocators_large_capv_l102(
+// CHECK1-SAME: i64 noundef [[X:%.*]]) #[[ATTR1:[0-9]+]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[X_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT:    [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[J:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 // CHECK1-NEXT:    store i64 [[X]], ptr [[X_ADDR]], align 8
-// CHECK1-NEXT:    store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 8
-// CHECK1-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8
 // CHECK1-NEXT:    [[DOTX__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP0]], i64 4, ptr inttoptr (i64 2 to ptr))
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i32, ptr [[X_ADDR]], align 4
-// CHECK1-NEXT:    store i32 [[TMP2]], ptr [[DOTX__VOID_ADDR]], align 4
-// CHECK1-NEXT:    store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK1:       for.cond:
-// CHECK1-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP3]], 1024
-// CHECK1-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]]
-// CHECK1:       for.body:
-// CHECK1-NEXT:    store i32 0, ptr [[J]], align 4
-// CHECK1-NEXT:    br label [[FOR_COND1:%.*]]
-// CHECK1:       for.cond1:
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i32, ptr [[J]], align 4
-// CHECK1-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP4]], 1024
-// CHECK1-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]]
-// CHECK1:       for.body3:
-// CHECK1-NEXT:    [[TMP5:%.*]] = load i32, ptr [[J]], align 4
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4
-// CHECK1-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[ADD]]
-// CHECK1-NEXT:    store i32 [[ADD4]], ptr [[DOTX__VOID_ADDR]], align 4
-// CHECK1-NEXT:    br label [[FOR_INC:%.*]]
-// CHECK1:       for.inc:
-// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[J]], align 4
-// CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK1-NEXT:    store i32 [[INC]], ptr [[J]], align 4
-// CHECK1-NEXT:    br label [[FOR_COND1]], !llvm.loop [[LOOP7:![0-9]+]]
-// CHECK1:       for.end:
-// CHECK1-NEXT:    br label [[FOR_INC5:%.*]]
-// CHECK1:       for.inc5:
-// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT:    [[INC6:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK1-NEXT:    store i32 [[INC6]], ptr [[I]], align 4
-// CHECK1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
-// CHECK1:       for.end7:
-// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4
-// CHECK1-NEXT:    store i32 [[TMP10]], ptr [[TMP1]], align 4
+// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK1-NEXT:    store i32 [[TMP1]], ptr [[DOTX__VOID_ADDR]], align 4
 // CHECK1-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTX__VOID_ADDR]], ptr inttoptr (i64 2 to ptr))
 // CHECK1-NEXT:    ret void
 //
@@ -297,169 +187,77 @@ int test_uses_allocators_large_cap() {
 // CHECK3-LABEL: define dso_local noundef i32 @_Z30test_uses_allocators_large_capv(
 // CHECK3-SAME: ) #[[ATTR0:[0-9]+]] {
 // CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    [[ERRORS:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[X:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DEVICE_RESULT:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[RESULT:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[J:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[X_CASTED:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4
 // CHECK3-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
-// CHECK3-NEXT:    store i32 0, ptr [[ERRORS]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[X]], align 4
 // CHECK3-NEXT:    store i32 0, ptr [[DEVICE_RESULT]], align 4
-// CHECK3-NEXT:    store i32 0, ptr [[RESULT]], align 4
-// CHECK3-NEXT:    store i32 0, ptr [[I]], align 4
-// CHECK3-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK3:       for.cond:
-// CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I]], align 4
-// CHECK3-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 1024
-// CHECK3-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]]
-// CHECK3:       for.body:
-// CHECK3-NEXT:    store i32 0, ptr [[J]], align 4
-// CHECK3-NEXT:    br label [[FOR_COND1:%.*]]
-// CHECK3:       for.cond1:
-// CHECK3-NEXT:    [[TMP1:%.*]] = load i32, ptr [[J]], align 4
-// CHECK3-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP1]], 1024
-// CHECK3-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]]
-// CHECK3:       for.body3:
-// CHECK3-NEXT:    [[TMP2:%.*]] = load i32, ptr [[J]], align 4
-// CHECK3-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I]], align 4
-// CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
-// CHECK3-NEXT:    [[TMP4:%.*]] = load i32, ptr [[RESULT]], align 4
-// CHECK3-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP4]], [[ADD]]
-// CHECK3-NEXT:    store i32 [[ADD4]], ptr [[RESULT]], align 4
-// CHECK3-NEXT:    br label [[FOR_INC:%.*]]
-// CHECK3:       for.inc:
-// CHECK3-NEXT:    [[TMP5:%.*]] = load i32, ptr [[J]], align 4
-// CHECK3-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP5]], 1
-// CHECK3-NEXT:    store i32 [[INC]], ptr [[J]], align 4
-// CHECK3-NEXT:    br label [[FOR_COND1]], !llvm.loop [[LOOP5:![0-9]+]]
-// CHECK3:       for.end:
-// CHECK3-NEXT:    br label [[FOR_INC5:%.*]]
-// CHECK3:       for.inc5:
-// CHECK3-NEXT:    [[TMP6:%.*]] = load i32, ptr [[I]], align 4
-// CHECK3-NEXT:    [[INC6:%.*]] = add nsw i32 [[TMP6]], 1
-// CHECK3-NEXT:    store i32 [[INC6]], ptr [[I]], align 4
-// CHECK3-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
-// CHECK3:       for.end7:
-// CHECK3-NEXT:    [[TMP7:%.*]] = load i32, ptr [[X]], align 4
-// CHECK3-NEXT:    store i32 [[TMP7]], ptr [[X_CASTED]], align 4
-// CHECK3-NEXT:    [[TMP8:%.*]] = load i32, ptr [[X_CASTED]], align 4
-// CHECK3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT:    store i32 [[TMP8]], ptr [[TMP9]], align 4
-// CHECK3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    store i32 [[TMP8]], ptr [[TMP10]], align 4
-// CHECK3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
-// CHECK3-NEXT:    store ptr null, ptr [[TMP11]], align 4
-// CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
-// CHECK3-NEXT:    store ptr [[DEVICE_RESULT]], ptr [[TMP12]], align 4
-// CHECK3-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
-// CHECK3-NEXT:    store ptr [[DEVICE_RESULT]], ptr [[TMP13]], align 4
-// CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
-// CHECK3-NEXT:    store ptr null, ptr [[TMP14]], align 4
-// CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
-// CHECK3-NEXT:    store i32 2, ptr [[TMP17]], align 4
-// CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
-// CHECK3-NEXT:    store i32 2, ptr [[TMP18]], align 4
-// CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
-// CHECK3-NEXT:    store ptr [[TMP15]], ptr [[TMP19]], align 4
-// CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
-// CHECK3-NEXT:    store ptr [[TMP16]], ptr [[TMP20]], align 4
-// CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
-// CHECK3-NEXT:    store ptr @.offload_sizes, ptr [[TMP21]], align 4
-// CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
-// CHECK3-NEXT:    store ptr @.offload_maptypes, ptr [[TMP22]], align 4
-// CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
-// CHECK3-NEXT:    store ptr null, ptr [[TMP23]], align 4
-// CHECK3-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
-// CHECK3-NEXT:    store ptr null, ptr [[TMP24]], align 4
-// CHECK3-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
-// CHECK3-NEXT:    store i64 0, ptr [[TMP25]], align 8
-// CHECK3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
-// CHECK3-NEXT:    store i64 0, ptr [[TMP26]], align 8
-// CHECK3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
-// CHECK3-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP27]], align 4
-// CHECK3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
-// CHECK3-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP28]], align 4
-// CHECK3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
-// CHECK3-NEXT:    store i32 0, ptr [[TMP29]], align 4
-// CHECK3-NEXT:    [[TMP30:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_uses_allocators_large_capv_l106.region_id, ptr [[KERNEL_ARGS]])
-// CHECK3-NEXT:    [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0
-// CHECK3-NEXT:    br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4
+// CHECK3-NEXT:    store i32 [[TMP0]], ptr [[X_CASTED]], align 4
+// CHECK3-NEXT:    [[TMP1:%.*]] = load i32, ptr [[X_CASTED]], align 4
+// CHECK3-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT:    store i32 [[TMP1]], ptr [[TMP2]], align 4
+// CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT:    store i32 [[TMP1]], ptr [[TMP3]], align 4
+// CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK3-NEXT:    store ptr null, ptr [[TMP4]], align 4
+// CHECK3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK3-NEXT:    store ptr [[DEVICE_RESULT]], ptr [[TMP5]], align 4
+// CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK3-NEXT:    store ptr [[DEVICE_RESULT]], ptr [[TMP6]], align 4
+// CHECK3-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK3-NEXT:    store ptr null, ptr [[TMP7]], align 4
+// CHECK3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT:    store i32 2, ptr [[TMP10]], align 4
+// CHECK3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT:    store i32 2, ptr [[TMP11]], align 4
+// CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT:    store ptr [[TMP8]], ptr [[TMP12]], align 4
+// CHECK3-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT:    store ptr [[TMP9]], ptr [[TMP13]], align 4
+// CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT:    store ptr @.offload_sizes, ptr [[TMP14]], align 4
+// CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT:    store ptr @.offload_maptypes, ptr [[TMP15]], align 4
+// CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT:    store ptr null, ptr [[TMP16]], align 4
+// CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT:    store ptr null, ptr [[TMP17]], align 4
+// CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK3-NEXT:    store i64 0, ptr [[TMP18]], align 8
+// CHECK3-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// CHECK3-NEXT:    store i64 0, ptr [[TMP19]], align 8
+// CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// CHECK3-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP20]], align 4
+// CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// CHECK3-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP21]], align 4
+// CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// CHECK3-NEXT:    store i32 0, ptr [[TMP22]], align 4
+// CHECK3-NEXT:    [[TMP23:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_uses_allocators_large_capv_l102.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT:    [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
+// CHECK3-NEXT:    br i1 [[TMP24]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
 // CHECK3:       omp_offload.failed:
-// CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_uses_allocators_large_capv_l106(i32 [[TMP8]], ptr [[DEVICE_RESULT]]) #[[ATTR2:[0-9]+]]
+// CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_uses_allocators_large_capv_l102(i32 [[TMP1]]) #[[ATTR2:[0-9]+]]
 // CHECK3-NEXT:    br label [[OMP_OFFLOAD_CONT]]
 // CHECK3:       omp_offload.cont:
-// CHECK3-NEXT:    [[TMP32:%.*]] = load i32, ptr [[RESULT]], align 4
-// CHECK3-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DEVICE_RESULT]], align 4
-// CHECK3-NEXT:    [[CMP8:%.*]] = icmp ne i32 [[TMP32]], [[TMP33]]
-// CHECK3-NEXT:    br i1 [[CMP8]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-// CHECK3:       if.then:
-// CHECK3-NEXT:    [[TMP34:%.*]] = load i32, ptr [[ERRORS]], align 4
-// CHECK3-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP34]], 1
-// CHECK3-NEXT:    store i32 [[ADD9]], ptr [[ERRORS]], align 4
-// CHECK3-NEXT:    br label [[IF_END]]
-// CHECK3:       if.end:
-// CHECK3-NEXT:    [[TMP35:%.*]] = load i32, ptr [[ERRORS]], align 4
-// CHECK3-NEXT:    ret i32 [[TMP35]]
+// CHECK3-NEXT:    ret i32 0
 //
 //
-// CHECK3-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_uses_allocators_large_capv_l106(
-// CHECK3-SAME: i32 noundef [[X:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[DEVICE_RESULT:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK3-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_uses_allocators_large_capv_l102(
+// CHECK3-SAME: i32 noundef [[X:%.*]]) #[[ATTR1:[0-9]+]] {
 // CHECK3-NEXT:  entry:
 // CHECK3-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT:    [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[J:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
 // CHECK3-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
-// CHECK3-NEXT:    store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 4
-// CHECK3-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 4
 // CHECK3-NEXT:    [[DOTX__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP0]], i32 4, ptr inttoptr (i32 2 to ptr))
-// CHECK3-NEXT:    [[TMP2:%.*]] = load i32, ptr [[X_ADDR]], align 4
-// CHECK3-NEXT:    store i32 [[TMP2]], ptr [[DOTX__VOID_ADDR]], align 4
-// CHECK3-NEXT:    store i32 0, ptr [[I]], align 4
-// CHECK3-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK3:       for.cond:
-// CHECK3-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I]], align 4
-// CHECK3-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP3]], 1024
-// CHECK3-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]]
-// CHECK3:       for.body:
-// CHECK3-NEXT:    store i32 0, ptr [[J]], align 4
-// CHECK3-NEXT:    br label [[FOR_COND1:%.*]]
-// CHECK3:       for.cond1:
-// CHECK3-NEXT:    [[TMP4:%.*]] = load i32, ptr [[J]], align 4
-// CHECK3-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP4]], 1024
-// CHECK3-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]]
-// CHECK3:       for.body3:
-// CHECK3-NEXT:    [[TMP5:%.*]] = load i32, ptr [[J]], align 4
-// CHECK3-NEXT:    [[TMP6:%.*]] = load i32, ptr [[I]], align 4
-// CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
-// CHECK3-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4
-// CHECK3-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[ADD]]
-// CHECK3-NEXT:    store i32 [[ADD4]], ptr [[DOTX__VOID_ADDR]], align 4
-// CHECK3-NEXT:    br label [[FOR_INC:%.*]]
-// CHECK3:       for.inc:
-// CHECK3-NEXT:    [[TMP8:%.*]] = load i32, ptr [[J]], align 4
-// CHECK3-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK3-NEXT:    store i32 [[INC]], ptr [[J]], align 4
-// CHECK3-NEXT:    br label [[FOR_COND1]], !llvm.loop [[LOOP8:![0-9]+]]
-// CHECK3:       for.end:
-// CHECK3-NEXT:    br label [[FOR_INC5:%.*]]
-// CHECK3:       for.inc5:
-// CHECK3-NEXT:    [[TMP9:%.*]] = load i32, ptr [[I]], align 4
-// CHECK3-NEXT:    [[INC6:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK3-NEXT:    store i32 [[INC6]], ptr [[I]], align 4
-// CHECK3-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]]
-// CHECK3:       for.end7:
-// CHECK3-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4
-// CHECK3-NEXT:    store i32 [[TMP10]], ptr [[TMP1]], align 4
+// CHECK3-NEXT:    [[TMP1:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK3-NEXT:    store i32 [[TMP1]], ptr [[DOTX__VOID_ADDR]], align 4
 // CHECK3-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTX__VOID_ADDR]], ptr inttoptr (i32 2 to ptr))
 // CHECK3-NEXT:    ret void
 //
@@ -471,110 +269,28 @@ int test_uses_allocators_large_cap() {
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK9-LABEL: define weak_odr protected void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_uses_allocators_large_capv_l106(
-// CHECK9-SAME: i64 noundef [[X:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[DEVICE_RESULT:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK9-LABEL: define weak_odr protected void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_uses_allocators_large_capv_l102(
+// CHECK9-SAME: i64 noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK9-NEXT:  entry:
 // CHECK9-NEXT:    [[X_ADDR:%.*]] = alloca i64, align 8
-// CHECK9-NEXT:    [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8
-// CHECK9-NEXT:    [[I:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[J:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
 // CHECK9-NEXT:    store i64 [[X]], ptr [[X_ADDR]], align 8
-// CHECK9-NEXT:    store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 8
-// CHECK9-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8
 // CHECK9-NEXT:    [[DOTX__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP0]], i64 4, ptr inttoptr (i64 2 to ptr))
-// CHECK9-NEXT:    [[TMP2:%.*]] = load i32, ptr [[X_ADDR]], align 4
-// CHECK9-NEXT:    store i32 [[TMP2]], ptr [[DOTX__VOID_ADDR]], align 4
-// CHECK9-NEXT:    store i32 0, ptr [[I]], align 4
-// CHECK9-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK9:       for.cond:
-// CHECK9-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I]], align 4
-// CHECK9-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP3]], 1024
-// CHECK9-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]]
-// CHECK9:       for.body:
-// CHECK9-NEXT:    store i32 0, ptr [[J]], align 4
-// CHECK9-NEXT:    br label [[FOR_COND1:%.*]]
-// CHECK9:       for.cond1:
-// CHECK9-NEXT:    [[TMP4:%.*]] = load i32, ptr [[J]], align 4
-// CHECK9-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP4]], 1024
-// CHECK9-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]]
-// CHECK9:       for.body3:
-// CHECK9-NEXT:    [[TMP5:%.*]] = load i32, ptr [[J]], align 4
-// CHECK9-NEXT:    [[TMP6:%.*]] = load i32, ptr [[I]], align 4
-// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
-// CHECK9-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4
-// CHECK9-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[ADD]]
-// CHECK9-NEXT:    store i32 [[ADD4]], ptr [[DOTX__VOID_ADDR]], align 4
-// CHECK9-NEXT:    br label [[FOR_INC:%.*]]
-// CHECK9:       for.inc:
-// CHECK9-NEXT:    [[TMP8:%.*]] = load i32, ptr [[J]], align 4
-// CHECK9-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK9-NEXT:    store i32 [[INC]], ptr [[J]], align 4
-// CHECK9-NEXT:    br label [[FOR_COND1]], !llvm.loop [[LOOP5:![0-9]+]]
-// CHECK9:       for.end:
-// CHECK9-NEXT:    br label [[FOR_INC5:%.*]]
-// CHECK9:       for.inc5:
-// CHECK9-NEXT:    [[TMP9:%.*]] = load i32, ptr [[I]], align 4
-// CHECK9-NEXT:    [[INC6:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK9-NEXT:    store i32 [[INC6]], ptr [[I]], align 4
-// CHECK9-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
-// CHECK9:       for.end7:
-// CHECK9-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4
-// CHECK9-NEXT:    store i32 [[TMP10]], ptr [[TMP1]], align 4
+// CHECK9-NEXT:    [[TMP1:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK9-NEXT:    store i32 [[TMP1]], ptr [[DOTX__VOID_ADDR]], align 4
 // CHECK9-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTX__VOID_ADDR]], ptr inttoptr (i64 2 to ptr))
 // CHECK9-NEXT:    ret void
 //
 //
-// CHECK11-LABEL: define weak_odr protected void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_uses_allocators_large_capv_l106(
-// CHECK11-SAME: i32 noundef [[X:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[DEVICE_RESULT:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK11-LABEL: define weak_odr protected void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_uses_allocators_large_capv_l102(
+// CHECK11-SAME: i32 noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK11-NEXT:  entry:
 // CHECK11-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 4
-// CHECK11-NEXT:    [[I:%.*]] = alloca i32, align 4
-// CHECK11-NEXT:    [[J:%.*]] = alloca i32, align 4
 // CHECK11-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
 // CHECK11-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
-// CHECK11-NEXT:    store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 4
-// CHECK11-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 4
 // CHECK11-NEXT:    [[DOTX__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP0]], i32 4, ptr inttoptr (i32 2 to ptr))
-// CHECK11-NEXT:    [[TMP2:%.*]] = load i32, ptr [[X_ADDR]], align 4
-// CHECK11-NEXT:    store i32 [[TMP2]], ptr [[DOTX__VOID_ADDR]], align 4
-// CHECK11-NEXT:    store i32 0, ptr [[I]], align 4
-// CHECK11-NEXT:    br label [[FOR_COND:%.*]]
-// CHECK11:       for.cond:
-// CHECK11-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I]], align 4
-// CHECK11-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP3]], 1024
-// CHECK11-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]]
-// CHECK11:       for.body:
-// CHECK11-NEXT:    store i32 0, ptr [[J]], align 4
-// CHECK11-NEXT:    br label [[FOR_COND1:%.*]]
-// CHECK11:       for.cond1:
-// CHECK11-NEXT:    [[TMP4:%.*]] = load i32, ptr [[J]], align 4
-// CHECK11-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP4]], 1024
-// CHECK11-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]]
-// CHECK11:       for.body3:
-// CHECK11-NEXT:    [[TMP5:%.*]] = load i32, ptr [[J]], align 4
-// CHECK11-NEXT:    [[TMP6:%.*]] = load i32, ptr [[I]], align 4
-// CHECK11-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
-// CHECK11-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4
-// CHECK11-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[ADD]]
-// CHECK11-NEXT:    store i32 [[ADD4]], ptr [[DOTX__VOID_ADDR]], align 4
-// CHECK11-NEXT:    br label [[FOR_INC:%.*]]
-// CHECK11:       for.inc:
-// CHECK11-NEXT:    [[TMP8:%.*]] = load i32, ptr [[J]], align 4
-// CHECK11-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK11-NEXT:    store i32 [[INC]], ptr [[J]], align 4
-// CHECK11-NEXT:    br label [[FOR_COND1]], !llvm.loop [[LOOP6:![0-9]+]]
-// CHECK11:       for.end:
-// CHECK11-NEXT:    br label [[FOR_INC5:%.*]]
-// CHECK11:       for.inc5:
-// CHECK11-NEXT:    [[TMP9:%.*]] = load i32, ptr [[I]], align 4
-// CHECK11-NEXT:    [[INC6:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK11-NEXT:    store i32 [[INC6]], ptr [[I]], align 4
-// CHECK11-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
-// CHECK11:       for.end7:
-// CHECK11-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4
-// CHECK11-NEXT:    store i32 [[TMP10]], ptr [[TMP1]], align 4
+// CHECK11-NEXT:    [[TMP1:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK11-NEXT:    store i32 [[TMP1]], ptr [[DOTX__VOID_ADDR]], align 4
 // CHECK11-NEXT:    call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTX__VOID_ADDR]], ptr inttoptr (i32 2 to ptr))
 // CHECK11-NEXT:    ret void
 //



More information about the cfe-commits mailing list