[Openmp-commits] [openmp] 6bd74fd - Revert commits for kernel environment
Shilei Tian via Openmp-commits
openmp-commits at lists.llvm.org
Sun Jul 23 20:33:13 PDT 2023
Author: Shilei Tian
Date: 2023-07-23T23:32:31-04:00
New Revision: 6bd74fd65fadef06edd92740c8ef4fc822d99ae3
URL: https://github.com/llvm/llvm-project/commit/6bd74fd65fadef06edd92740c8ef4fc822d99ae3
DIFF: https://github.com/llvm/llvm-project/commit/6bd74fd65fadef06edd92740c8ef4fc822d99ae3.diff
LOG: Revert commits for kernel environment
This reverts commits for kernel environments as they causes issues in AMD BB.
Added:
openmp/libomptarget/include/DeviceEnvironment.h
Modified:
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
clang/test/OpenMP/amdgcn_target_codegen.cpp
clang/test/OpenMP/amdgcn_target_device_vla.cpp
clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c
clang/test/OpenMP/declare_target_codegen_globalization.cpp
clang/test/OpenMP/nvptx_SPMD_codegen.cpp
clang/test/OpenMP/nvptx_data_sharing.cpp
clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
clang/test/OpenMP/nvptx_lambda_capturing.cpp
clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp
clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp
clang/test/OpenMP/nvptx_parallel_codegen.cpp
clang/test/OpenMP/nvptx_parallel_for_codegen.cpp
clang/test/OpenMP/nvptx_target_codegen.cpp
clang/test/OpenMP/nvptx_target_parallel_codegen.cpp
clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp
clang/test/OpenMP/nvptx_target_printf_codegen.c
clang/test/OpenMP/nvptx_target_simd_codegen.cpp
clang/test/OpenMP/nvptx_target_teams_codegen.cpp
clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp
clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp
clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp
clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp
clang/test/OpenMP/nvptx_teams_codegen.cpp
clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp
clang/test/OpenMP/reduction_implicit_map.cpp
clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c
clang/test/OpenMP/remarks_parallel_in_target_state_machine.c
clang/test/OpenMP/target_parallel_debug_codegen.cpp
clang/test/OpenMP/target_parallel_for_debug_codegen.cpp
clang/test/OpenMP/target_parallel_generic_loop_codegen-3.cpp
clang/test/OpenMP/target_parallel_generic_loop_codegen.cpp
clang/test/OpenMP/target_teams_generic_loop_codegen.cpp
llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
llvm/lib/Transforms/IPO/OpenMPOpt.cpp
llvm/test/Transforms/Attributor/reduced/aa_execution_domain_wrong_fn.ll
llvm/test/Transforms/Attributor/value-simplify-local-remote.ll
llvm/test/Transforms/OpenMP/add_attributes.ll
llvm/test/Transforms/OpenMP/always_inline_device.ll
llvm/test/Transforms/OpenMP/custom_state_machines.ll
llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll
llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll
llvm/test/Transforms/OpenMP/deduplication_target.ll
llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll
llvm/test/Transforms/OpenMP/global_constructor.ll
llvm/test/Transforms/OpenMP/globalization_remarks.ll
llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll
llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll
llvm/test/Transforms/OpenMP/nested_parallelism.ll
llvm/test/Transforms/OpenMP/parallel_level_fold.ll
llvm/test/Transforms/OpenMP/remove_globalization.ll
llvm/test/Transforms/OpenMP/replace_globalization.ll
llvm/test/Transforms/OpenMP/single_threaded_execution.ll
llvm/test/Transforms/OpenMP/spmdization.ll
llvm/test/Transforms/OpenMP/spmdization_assumes.ll
llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll
llvm/test/Transforms/OpenMP/spmdization_guarding.ll
llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll
llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll
llvm/test/Transforms/OpenMP/spmdization_remarks.ll
llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
llvm/test/Transforms/PhaseOrdering/openmp-opt-module.ll
llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
openmp/libomptarget/DeviceRTL/CMakeLists.txt
openmp/libomptarget/DeviceRTL/include/Debug.h
openmp/libomptarget/DeviceRTL/include/Interface.h
openmp/libomptarget/DeviceRTL/include/State.h
openmp/libomptarget/DeviceRTL/src/Configuration.cpp
openmp/libomptarget/DeviceRTL/src/Debug.cpp
openmp/libomptarget/DeviceRTL/src/Kernel.cpp
openmp/libomptarget/DeviceRTL/src/State.cpp
openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
Removed:
openmp/libomptarget/include/Environment.h
################################################################################
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 1ec9bb3d523471..62aacb9e24d6a5 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -795,7 +795,7 @@ void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF,
emitGenericVarsEpilog(CGF);
CGBuilderTy &Bld = CGF.Builder;
- OMPBuilder.createTargetDeinit(Bld);
+ OMPBuilder.createTargetDeinit(Bld, IsSPMD);
}
void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,
@@ -833,6 +833,24 @@ void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,
IsInTTDRegion = false;
}
+// Create a unique global variable to indicate the execution mode of this target
+// region. The execution mode is either 'generic', or 'spmd' depending on the
+// target directive. This variable is picked up by the offload library to setup
+// the device appropriately before kernel launch. If the execution mode is
+// 'generic', the runtime reserves one warp for the master, otherwise, all
+// warps participate in parallel work.
+static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name,
+ bool Mode) {
+ auto *GVMode = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
+ llvm::GlobalValue::WeakAnyLinkage,
+ llvm::ConstantInt::get(CGM.Int8Ty, Mode ? OMP_TGT_EXEC_MODE_SPMD
+ : OMP_TGT_EXEC_MODE_GENERIC),
+ Twine(Name, "_exec_mode"));
+ GVMode->setVisibility(llvm::GlobalVariable::ProtectedVisibility);
+ CGM.addCompilerUsedGlobal(GVMode);
+}
+
void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(
const OMPExecutableDirective &D, StringRef ParentName,
llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
@@ -849,6 +867,8 @@ void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(
else
emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
CodeGen);
+
+ setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode);
}
CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
diff --git a/clang/test/OpenMP/amdgcn_target_codegen.cpp b/clang/test/OpenMP/amdgcn_target_codegen.cpp
index 90d2ebdf26bd64..223e6b3ca946b8 100644
--- a/clang/test/OpenMP/amdgcn_target_codegen.cpp
+++ b/clang/test/OpenMP/amdgcn_target_codegen.cpp
@@ -37,7 +37,7 @@ int test_amdgcn_target_tid_threads_simd() {
// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
// CHECK-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_amdgcn_target_tid_threadsv_l14_kernel_environment to ptr))
+// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 1, i1 true)
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
@@ -61,7 +61,7 @@ int test_amdgcn_target_tid_threads_simd() {
// CHECK: worker.exit:
// CHECK-NEXT: ret void
// CHECK: for.end:
-// CHECK-NEXT: call void @__kmpc_target_deinit()
+// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1)
// CHECK-NEXT: ret void
//
//
@@ -78,7 +78,7 @@ int test_amdgcn_target_tid_threads_simd() {
// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
// CHECK-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z35test_amdgcn_target_tid_threads_simdv_l23_kernel_environment to ptr))
+// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2, i1 false)
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
@@ -109,6 +109,6 @@ int test_amdgcn_target_tid_threads_simd() {
// CHECK-NEXT: ret void
// CHECK: omp.inner.for.end:
// CHECK-NEXT: store i32 1000, ptr [[I_ASCAST]], align 4
-// CHECK-NEXT: call void @__kmpc_target_deinit()
+// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2)
// CHECK-NEXT: ret void
//
diff --git a/clang/test/OpenMP/amdgcn_target_device_vla.cpp b/clang/test/OpenMP/amdgcn_target_device_vla.cpp
index 417641e0268720..f38f7e849ad201 100644
--- a/clang/test/OpenMP/amdgcn_target_device_vla.cpp
+++ b/clang/test/OpenMP/amdgcn_target_device_vla.cpp
@@ -111,7 +111,7 @@ int main() {
// CHECK-NEXT: [[I1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I1]] to ptr
// CHECK-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo1v_l12_kernel_environment to ptr))
+// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 1, i1 true)
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
@@ -169,7 +169,7 @@ int main() {
// CHECK-NEXT: br label [[FOR_COND2]], !llvm.loop [[LOOP15:![0-9]+]]
// CHECK: for.end9:
// CHECK-NEXT: call void @__kmpc_free_shared(ptr [[A]], i64 [[TMP7]])
-// CHECK-NEXT: call void @__kmpc_target_deinit()
+// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1)
// CHECK-NEXT: ret void
//
//
@@ -193,18 +193,18 @@ int main() {
// CHECK-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo2v_l30_kernel_environment to ptr))
+// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2, i1 false)
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
-// CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
+// CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR_ASCAST]], align 4
// CHECK-NEXT: store i32 [[TMP4]], ptr [[M_CASTED_ASCAST]], align 4
// CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[M_CASTED_ASCAST]], align 8
// CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo2v_l30_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]]) #[[ATTR5:[0-9]+]]
-// CHECK-NEXT: call void @__kmpc_target_deinit()
+// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2)
// CHECK-NEXT: ret void
// CHECK: worker.exit:
// CHECK-NEXT: ret void
@@ -559,7 +559,7 @@ int main() {
// CHECK-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l52_kernel_environment to ptr))
+// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1, i1 true)
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
@@ -570,7 +570,7 @@ int main() {
// CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l52_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP5]], i64 [[TMP0]], ptr [[TMP1]]) #[[ATTR5]]
-// CHECK-NEXT: call void @__kmpc_target_deinit()
+// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1)
// CHECK-NEXT: ret void
// CHECK: worker.exit:
// CHECK-NEXT: ret void
@@ -918,7 +918,7 @@ int main() {
// CHECK-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l76_kernel_environment to ptr))
+// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1, i1 true)
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
@@ -932,7 +932,7 @@ int main() {
// CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l76_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP0]], ptr [[TMP1]]) #[[ATTR5]]
-// CHECK-NEXT: call void @__kmpc_target_deinit()
+// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 1)
// CHECK-NEXT: ret void
// CHECK: worker.exit:
// CHECK-NEXT: ret void
diff --git a/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c b/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c
index cc8f0b1bc763d4..277ceacfd40b9f 100644
--- a/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c
+++ b/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c
@@ -33,11 +33,11 @@ void write_to_aligned_array(int *a, int N) {
// CHECK-AMD-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
// CHECK-AMD-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
// CHECK-AMD-NEXT: store ptr [[APTR]], ptr [[APTR_ADDR_ASCAST]], align 8
-// CHECK-AMD-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_write_to_aligned_array_l14_kernel_environment to ptr))
+// CHECK-AMD-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 2, i1 false)
// CHECK-AMD-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-AMD-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-AMD: user_code.entry:
-// CHECK-AMD-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
+// CHECK-AMD-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
// CHECK-AMD-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
// CHECK-AMD-NEXT: store i32 [[TMP2]], ptr [[N_CASTED_ASCAST]], align 4
// CHECK-AMD-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
@@ -45,7 +45,7 @@ void write_to_aligned_array(int *a, int N) {
// CHECK-AMD-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
// CHECK-AMD-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
// CHECK-AMD-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_write_to_aligned_array_l14_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP3]], ptr [[TMP4]]) #[[ATTR2:[0-9]+]]
-// CHECK-AMD-NEXT: call void @__kmpc_target_deinit()
+// CHECK-AMD-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2)
// CHECK-AMD-NEXT: ret void
// CHECK-AMD: worker.exit:
// CHECK-AMD-NEXT: ret void
diff --git a/clang/test/OpenMP/declare_target_codegen_globalization.cpp b/clang/test/OpenMP/declare_target_codegen_globalization.cpp
index 643820ae18df5b..a894d6903bc82b 100644
--- a/clang/test/OpenMP/declare_target_codegen_globalization.cpp
+++ b/clang/test/OpenMP/declare_target_codegen_globalization.cpp
@@ -31,15 +31,15 @@ int maini1() {
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6maini1v_l16_kernel_environment)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6maini1v_l16_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp
index ad9b365a401e58..643ff23637f7ec 100644
--- a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp
@@ -280,11 +280,11 @@ int a;
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
-// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-64-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
@@ -293,7 +293,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2:[0-9]+]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -694,7 +694,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -702,7 +702,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -885,7 +885,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -893,7 +893,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -1067,7 +1067,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -1075,7 +1075,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -1256,7 +1256,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -1264,7 +1264,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -1445,7 +1445,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -1453,7 +1453,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -1634,7 +1634,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -1642,7 +1642,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -1826,7 +1826,7 @@ int a;
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -1837,7 +1837,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -2029,7 +2029,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -2037,7 +2037,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -2206,7 +2206,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -2214,7 +2214,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -2374,7 +2374,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -2382,7 +2382,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -2549,7 +2549,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -2557,7 +2557,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -2724,7 +2724,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -2732,7 +2732,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -2899,7 +2899,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -2907,7 +2907,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -3074,7 +3074,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -3082,7 +3082,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -3257,7 +3257,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -3265,7 +3265,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -3450,7 +3450,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -3458,7 +3458,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -3636,7 +3636,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -3644,7 +3644,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -3825,7 +3825,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -3833,7 +3833,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -4014,7 +4014,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -4022,7 +4022,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -4203,7 +4203,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -4211,7 +4211,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -4392,7 +4392,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -4400,7 +4400,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -4560,7 +4560,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -4568,7 +4568,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -4737,7 +4737,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -4745,7 +4745,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -4905,7 +4905,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -4913,7 +4913,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -5080,7 +5080,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -5088,7 +5088,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -5255,7 +5255,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -5263,7 +5263,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -5430,7 +5430,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -5438,7 +5438,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -5605,7 +5605,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -5613,7 +5613,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -5773,7 +5773,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -5781,7 +5781,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -5950,7 +5950,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -5958,7 +5958,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -6118,7 +6118,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -6126,7 +6126,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -6293,7 +6293,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -6301,7 +6301,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -6468,7 +6468,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -6476,7 +6476,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -6643,7 +6643,7 @@ int a;
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -6651,7 +6651,7 @@ int a;
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -6819,7 +6819,7 @@ int a;
// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -6828,7 +6828,7 @@ int a;
// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-64-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -6915,13 +6915,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -6991,13 +6991,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -7084,13 +7084,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -7156,13 +7156,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -7228,13 +7228,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -7300,13 +7300,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -7374,7 +7374,7 @@ int a;
// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -7383,7 +7383,7 @@ int a;
// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-64-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -7478,13 +7478,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -7562,13 +7562,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -7663,13 +7663,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -7743,13 +7743,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -7823,13 +7823,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -7903,13 +7903,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -7983,13 +7983,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -8064,13 +8064,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -8148,13 +8148,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -8249,13 +8249,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -8329,13 +8329,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -8409,13 +8409,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -8489,13 +8489,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -8569,13 +8569,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -8662,13 +8662,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -8738,13 +8738,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -8831,13 +8831,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -8903,13 +8903,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -8975,13 +8975,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -9047,13 +9047,13 @@ int a;
// CHECK-64-SAME: () #[[ATTR8]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -9123,11 +9123,11 @@ int a;
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
-// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-32-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
@@ -9136,7 +9136,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -9525,7 +9525,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -9533,7 +9533,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -9712,7 +9712,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -9720,7 +9720,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -9889,7 +9889,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -9897,7 +9897,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -10074,7 +10074,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -10082,7 +10082,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -10259,7 +10259,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -10267,7 +10267,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -10444,7 +10444,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -10452,7 +10452,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -10632,7 +10632,7 @@ int a;
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -10643,7 +10643,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -10830,7 +10830,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -10838,7 +10838,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -11003,7 +11003,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -11011,7 +11011,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -11166,7 +11166,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -11174,7 +11174,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -11337,7 +11337,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -11345,7 +11345,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -11508,7 +11508,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -11516,7 +11516,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -11679,7 +11679,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -11687,7 +11687,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -11850,7 +11850,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -11858,7 +11858,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -12028,7 +12028,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -12036,7 +12036,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -12217,7 +12217,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -12225,7 +12225,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -12398,7 +12398,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -12406,7 +12406,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -12583,7 +12583,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -12591,7 +12591,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -12768,7 +12768,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -12776,7 +12776,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -12953,7 +12953,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -12961,7 +12961,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -13138,7 +13138,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -13146,7 +13146,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -13301,7 +13301,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -13309,7 +13309,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -13474,7 +13474,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -13482,7 +13482,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -13637,7 +13637,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -13645,7 +13645,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -13808,7 +13808,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -13816,7 +13816,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -13979,7 +13979,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -13987,7 +13987,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -14150,7 +14150,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -14158,7 +14158,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -14321,7 +14321,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -14329,7 +14329,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -14484,7 +14484,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -14492,7 +14492,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -14657,7 +14657,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -14665,7 +14665,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -14820,7 +14820,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -14828,7 +14828,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -14991,7 +14991,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -14999,7 +14999,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -15162,7 +15162,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -15170,7 +15170,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -15333,7 +15333,7 @@ int a;
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -15341,7 +15341,7 @@ int a;
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -15505,7 +15505,7 @@ int a;
// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -15514,7 +15514,7 @@ int a;
// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-32-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -15601,13 +15601,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -15677,13 +15677,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -15770,13 +15770,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -15842,13 +15842,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -15914,13 +15914,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -15986,13 +15986,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -16060,7 +16060,7 @@ int a;
// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -16069,7 +16069,7 @@ int a;
// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-32-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -16164,13 +16164,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -16248,13 +16248,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -16349,13 +16349,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -16429,13 +16429,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -16509,13 +16509,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -16589,13 +16589,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -16669,13 +16669,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -16750,13 +16750,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -16834,13 +16834,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -16935,13 +16935,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -17015,13 +17015,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -17095,13 +17095,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -17175,13 +17175,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -17255,13 +17255,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -17348,13 +17348,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -17424,13 +17424,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -17517,13 +17517,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -17589,13 +17589,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -17661,13 +17661,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -17733,13 +17733,13 @@ int a;
// CHECK-32-SAME: () #[[ATTR8]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -17809,11 +17809,11 @@ int a;
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-32-EX-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
@@ -17822,7 +17822,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -18211,7 +18211,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -18219,7 +18219,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -18398,7 +18398,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -18406,7 +18406,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -18575,7 +18575,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -18583,7 +18583,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -18760,7 +18760,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -18768,7 +18768,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -18945,7 +18945,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -18953,7 +18953,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -19130,7 +19130,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -19138,7 +19138,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -19318,7 +19318,7 @@ int a;
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -19329,7 +19329,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -19516,7 +19516,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -19524,7 +19524,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -19689,7 +19689,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -19697,7 +19697,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -19852,7 +19852,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -19860,7 +19860,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -20023,7 +20023,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -20031,7 +20031,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -20194,7 +20194,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -20202,7 +20202,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -20365,7 +20365,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -20373,7 +20373,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -20536,7 +20536,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -20544,7 +20544,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -20714,7 +20714,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -20722,7 +20722,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -20903,7 +20903,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -20911,7 +20911,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -21084,7 +21084,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -21092,7 +21092,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -21269,7 +21269,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -21277,7 +21277,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -21454,7 +21454,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -21462,7 +21462,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -21639,7 +21639,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -21647,7 +21647,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -21824,7 +21824,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -21832,7 +21832,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -21987,7 +21987,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -21995,7 +21995,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -22160,7 +22160,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -22168,7 +22168,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -22323,7 +22323,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -22331,7 +22331,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -22494,7 +22494,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -22502,7 +22502,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -22665,7 +22665,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -22673,7 +22673,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -22836,7 +22836,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -22844,7 +22844,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -23007,7 +23007,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -23015,7 +23015,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -23170,7 +23170,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -23178,7 +23178,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -23343,7 +23343,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -23351,7 +23351,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -23506,7 +23506,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -23514,7 +23514,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -23677,7 +23677,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -23685,7 +23685,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -23848,7 +23848,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -23856,7 +23856,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -24019,7 +24019,7 @@ int a;
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -24027,7 +24027,7 @@ int a;
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -24191,7 +24191,7 @@ int a;
// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -24200,7 +24200,7 @@ int a;
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -24287,13 +24287,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -24363,13 +24363,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -24456,13 +24456,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -24528,13 +24528,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -24600,13 +24600,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -24672,13 +24672,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -24746,7 +24746,7 @@ int a;
// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -24755,7 +24755,7 @@ int a;
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -24850,13 +24850,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -24934,13 +24934,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -25035,13 +25035,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -25115,13 +25115,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -25195,13 +25195,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -25275,13 +25275,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -25355,13 +25355,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -25436,13 +25436,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -25520,13 +25520,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -25621,13 +25621,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -25701,13 +25701,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -25781,13 +25781,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -25861,13 +25861,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -25941,13 +25941,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -26034,13 +26034,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -26110,13 +26110,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -26203,13 +26203,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -26275,13 +26275,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -26347,13 +26347,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -26419,13 +26419,13 @@ int a;
// CHECK-32-EX-SAME: () #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_data_sharing.cpp b/clang/test/OpenMP/nvptx_data_sharing.cpp
index 180d6c2575f40d..00322f784da34e 100644
--- a/clang/test/OpenMP/nvptx_data_sharing.cpp
+++ b/clang/test/OpenMP/nvptx_data_sharing.cpp
@@ -36,13 +36,13 @@ void test_ds(){
// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [2 x ptr], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_kernel_environment)
+// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
// CHECK-NEXT: [[A:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
// CHECK-NEXT: [[B:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: store i32 10, ptr [[A]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK-NEXT: store ptr [[A]], ptr [[TMP2]], align 8
@@ -56,7 +56,7 @@ void test_ds(){
// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_omp_outlined1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_omp_outlined1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 2)
// CHECK-NEXT: call void @__kmpc_free_shared(ptr [[B]], i64 4)
// CHECK-NEXT: call void @__kmpc_free_shared(ptr [[A]], i64 4)
-// CHECK-NEXT: call void @__kmpc_target_deinit()
+// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-NEXT: ret void
// CHECK: worker.exit:
// CHECK-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
index 5852d348e8bd21..a4297d5e886680 100644
--- a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
@@ -44,18 +44,18 @@ int main(int argc, char **argv) {
// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 8
// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8
-// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_kernel_environment)
+// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
// CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK4: user_code.entry:
-// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK4-NEXT: store i32 [[TMP6]], ptr [[ARGC_CASTED]], align 4
// CHECK4-NEXT: [[TMP7:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8
// CHECK4-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP7]], ptr [[TMP3]]) #[[ATTR5:[0-9]+]]
-// CHECK4-NEXT: call void @__kmpc_target_deinit()
+// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK4-NEXT: ret void
// CHECK4: worker.exit:
// CHECK4-NEXT: ret void
@@ -360,18 +360,18 @@ int main(int argc, char **argv) {
// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 4
// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4
-// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_kernel_environment)
+// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK5-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
// CHECK5-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK5: user_code.entry:
-// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK5-NEXT: store i32 [[TMP6]], ptr [[ARGC_CASTED]], align 4
// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4
// CHECK5-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP7]], ptr [[TMP3]]) #[[ATTR5:[0-9]+]]
-// CHECK5-NEXT: call void @__kmpc_target_deinit()
+// CHECK5-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK5-NEXT: ret void
// CHECK5: worker.exit:
// CHECK5-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_lambda_capturing.cpp b/clang/test/OpenMP/nvptx_lambda_capturing.cpp
index dc177bdc6902f7..66a711897401a9 100644
--- a/clang/test/OpenMP/nvptx_lambda_capturing.cpp
+++ b/clang/test/OpenMP/nvptx_lambda_capturing.cpp
@@ -834,7 +834,7 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L_ADDR]], align 8
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l27_kernel_environment)
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -846,7 +846,7 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8
// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP2]], align 8
// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP6]]) #[[ATTR7:[0-9]+]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -877,18 +877,18 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L_ADDR]], align 8
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29_kernel_environment)
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8
// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP6]], align 8
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -953,7 +953,7 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8
// CHECK2-NEXT: store ptr [[TMP3]], ptr [[_TMP2]], align 8
-// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_kernel_environment)
+// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -983,7 +983,7 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 8
// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 8
// CHECK2-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP18]]) #[[ATTR7]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -1016,7 +1016,7 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[_TMP1]], align 8
// CHECK2-NEXT: store ptr [[TMP4]], ptr [[_TMP2]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43_kernel_environment)
+// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP5]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -1038,7 +1038,7 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 5
// CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP16]], align 8
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 6)
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -1124,7 +1124,7 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -1133,7 +1133,7 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK2-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -1193,7 +1193,7 @@ int main(int argc, char **argv) {
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8
// CHECK3-NEXT: store ptr [[TMP3]], ptr [[_TMP2]], align 8
-// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_kernel_environment)
+// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
@@ -1223,7 +1223,7 @@ int main(int argc, char **argv) {
// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 8
// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 8
// CHECK3-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP18]]) #[[ATTR7:[0-9]+]]
-// CHECK3-NEXT: call void @__kmpc_target_deinit()
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@@ -1256,11 +1256,11 @@ int main(int argc, char **argv) {
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8
// CHECK3-NEXT: store ptr [[TMP2]], ptr [[_TMP1]], align 8
// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP2]], align 8
-// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43_kernel_environment)
+// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP5]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
-// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8
// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8
// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8
@@ -1278,7 +1278,7 @@ int main(int argc, char **argv) {
// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 5
// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP16]], align 8
// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 6)
-// CHECK3-NEXT: call void @__kmpc_target_deinit()
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@@ -1368,7 +1368,7 @@ int main(int argc, char **argv) {
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L_ADDR]], align 8
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l27_kernel_environment)
+// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
@@ -1380,7 +1380,7 @@ int main(int argc, char **argv) {
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8
// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP2]], align 8
// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP6]]) #[[ATTR7]]
-// CHECK3-NEXT: call void @__kmpc_target_deinit()
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@@ -1411,7 +1411,7 @@ int main(int argc, char **argv) {
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L_ADDR]], align 8
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29_kernel_environment)
+// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
@@ -1422,7 +1422,7 @@ int main(int argc, char **argv) {
// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP6]], align 8
// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
-// CHECK3-NEXT: call void @__kmpc_target_deinit()
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@@ -1465,7 +1465,7 @@ int main(int argc, char **argv) {
// CHECK3-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8
-// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18_kernel_environment)
+// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
@@ -1474,7 +1474,7 @@ int main(int argc, char **argv) {
// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8
// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-// CHECK3-NEXT: call void @__kmpc_target_deinit()
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp
index d73212cd344c57..edf1676eecf588 100644
--- a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp
@@ -29,13 +29,13 @@ int main() {
// CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21_kernel_environment)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -64,12 +64,12 @@ int main() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23
// CHECK1-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: call void @_Z3usev() #[[ATTR8]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -105,13 +105,13 @@ int main() {
// CHECK2-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21_kernel_environment)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -140,12 +140,12 @@ int main() {
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23
// CHECK2-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: call void @_Z3usev() #[[ATTR8]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp
index 85c736b9859257..ca464c8651ae55 100644
--- a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp
@@ -40,16 +40,16 @@ int main() {
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_kernel_environment)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6:[0-9]+]]
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -150,16 +150,16 @@ int main() {
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6:[0-9]+]]
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp
index ef17cc1f5a1a2f..38a71bb4a3476b 100644
--- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp
@@ -81,18 +81,18 @@ int bar(int n){
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x ptr], align 8
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_kernel_environment)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2_wrapper, ptr [[CAPTURED_VARS_ADDRS2]], i64 0)
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -192,7 +192,7 @@ int bar(int n){
// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_kernel_environment)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -213,7 +213,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
// CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -255,7 +255,7 @@ int bar(int n){
// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_kernel_environment)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -270,7 +270,7 @@ int bar(int n){
// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK1-NEXT: store i32 [[INC]], ptr [[A1]], align 4
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i64 4)
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -344,18 +344,18 @@ int bar(int n){
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 4
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x ptr], align 4
// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_kernel_environment)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i32 0)
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2_wrapper, ptr [[CAPTURED_VARS_ADDRS2]], i32 0)
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
// CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -455,7 +455,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -476,7 +476,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
// CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -518,7 +518,7 @@ int bar(int n){
// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_kernel_environment)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -533,7 +533,7 @@ int bar(int n){
// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK2-NEXT: store i32 [[INC]], ptr [[A1]], align 4
// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i32 4)
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp
index fc01baa34c18b0..87f25b137d5cc0 100644
--- a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp
@@ -41,12 +41,12 @@ int bar(int n){
// CHECK-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l13_kernel_environment)
+// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
// CHECK-NEXT: [[D:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
-// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK-NEXT: store i32 [[TMP3]], ptr [[D]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
@@ -59,7 +59,7 @@ int bar(int n){
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
// CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4
// CHECK-NEXT: call void @__kmpc_free_shared(ptr [[D]], i64 4)
-// CHECK-NEXT: call void @__kmpc_target_deinit()
+// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-NEXT: ret void
// CHECK: worker.exit:
// CHECK-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp
index 58e487e866e433..856976efb14575 100644
--- a/clang/test/OpenMP/nvptx_target_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_codegen.cpp
@@ -153,17 +153,17 @@ void unreachable_call() {
// CHECK1-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 8
// CHECK1-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_kernel_environment)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[PTR1_ADDR]], ptr [[TMP3]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -192,11 +192,11 @@ void unreachable_call() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39
// CHECK1-SAME: () #[[ATTR4:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39_kernel_environment)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -207,7 +207,7 @@ void unreachable_call() {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47_kernel_environment)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -221,7 +221,7 @@ void unreachable_call() {
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 2
// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16
// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -256,7 +256,7 @@ void unreachable_call() {
// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8
-// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53_kernel_environment)
+// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -300,7 +300,7 @@ void unreachable_call() {
// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[CALL]], align 8
// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP17]], 1
// CHECK1-NEXT: store i64 [[ADD21]], ptr [[CALL]], align 8
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -330,7 +330,7 @@ void unreachable_call() {
// CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90_kernel_environment)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -351,7 +351,7 @@ void unreachable_call() {
// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK1-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -374,7 +374,7 @@ void unreachable_call() {
// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108_kernel_environment)
+// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -397,7 +397,7 @@ void unreachable_call() {
// CHECK1-NEXT: [[CONV7:%.*]] = fptosi double [[TMP8]] to i32
// CHECK1-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3baziRd(i32 [[CONV7]], ptr nonnull align 8 dereferenceable(8) [[A8]]) #[[ATTR10]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -426,7 +426,7 @@ void unreachable_call() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142
// CHECK1-SAME: () #[[ATTR4]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142_kernel_environment)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -435,7 +435,7 @@ void unreachable_call() {
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
// CHECK1: 1:
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
//
//
@@ -449,7 +449,7 @@ void unreachable_call() {
// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74_kernel_environment)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -465,7 +465,7 @@ void unreachable_call() {
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -523,17 +523,17 @@ void unreachable_call() {
// CHECK2-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 4
// CHECK2-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[PTR1_ADDR]], ptr [[TMP3]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -562,11 +562,11 @@ void unreachable_call() {
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39
// CHECK2-SAME: () #[[ATTR4:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39_kernel_environment)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -577,7 +577,7 @@ void unreachable_call() {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47_kernel_environment)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -591,7 +591,7 @@ void unreachable_call() {
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 2
// CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16
// CHECK2-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -626,7 +626,7 @@ void unreachable_call() {
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53_kernel_environment)
+// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -670,7 +670,7 @@ void unreachable_call() {
// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[CALL]], align 8
// CHECK2-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP17]], 1
// CHECK2-NEXT: store i64 [[ADD21]], ptr [[CALL]], align 8
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -700,7 +700,7 @@ void unreachable_call() {
// CHECK2-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -721,7 +721,7 @@ void unreachable_call() {
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK2-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -744,7 +744,7 @@ void unreachable_call() {
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108_kernel_environment)
+// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -767,7 +767,7 @@ void unreachable_call() {
// CHECK2-NEXT: [[CONV7:%.*]] = fptosi double [[TMP8]] to i32
// CHECK2-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0
// CHECK2-NEXT: [[CALL:%.*]] = call i32 @_Z3baziRd(i32 [[CONV7]], ptr nonnull align 8 dereferenceable(8) [[A8]]) #[[ATTR10]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -796,7 +796,7 @@ void unreachable_call() {
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142
// CHECK2-SAME: () #[[ATTR4]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142_kernel_environment)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -805,7 +805,7 @@ void unreachable_call() {
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
// CHECK2: 1:
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
//
//
@@ -819,7 +819,7 @@ void unreachable_call() {
// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -835,7 +835,7 @@ void unreachable_call() {
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp
index cb67702f34c82b..d36bbaa365d15a 100644
--- a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp
@@ -58,15 +58,15 @@ int bar(int n){
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -103,7 +103,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35_kernel_environment)
+// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -115,7 +115,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -159,15 +159,15 @@ int bar(int n){
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -204,7 +204,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35_kernel_environment)
+// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -216,7 +216,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
index ae09103267e0fe..a31a9e2fb8a8f4 100644
--- a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
@@ -53,15 +53,15 @@ int bar(int n){
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_kernel_environment)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -100,7 +100,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment)
+// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -113,7 +113,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -157,15 +157,15 @@ int bar(int n){
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -204,7 +204,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment)
+// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -217,7 +217,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4
// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
index 7bf0b934962e5e..c4ea77061ad559 100644
--- a/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
@@ -52,17 +52,141 @@ int bar(int n){
}
#endif
+// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27
+// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
+// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// CHECK: user_code.entry:
+// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
+// CHECK-NEXT: ret void
+// CHECK: worker.exit:
+// CHECK-NEXT: ret void
+// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__
+// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK-NEXT: ret void
+// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
+// CHECK-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
+// CHECK-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
+// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// CHECK: user_code.entry:
+// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr
+// CHECK-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4
+// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
+// CHECK-NEXT: ret void
+// CHECK: worker.exit:
+// CHECK-NEXT: ret void
+// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1
+// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
+// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
+// CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
+// CHECK-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2
+// CHECK-NEXT: ret void
+// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36
+// CHECK-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4
+// CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
+// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
+// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// CHECK: user_code.entry:
+// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2
+// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4
+// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP4]] to ptr
+// CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4
+// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP6]] to ptr
+// CHECK-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4
+// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4
+// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
+// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
+// CHECK-NEXT: ret void
+// CHECK: worker.exit:
+// CHECK-NEXT: ret void
+// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__2
+// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
+// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
+// CHECK-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32
+// CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
+// CHECK-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
+// CHECK-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1
+// CHECK-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: ret void
// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27
// CHECK45-64-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK45-64-NEXT: entry:
// CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK45-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment)
+// CHECK45-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
-// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-64-NEXT: ret void
// CHECK45-64: worker.exit:
// CHECK45-64-NEXT: ret void
@@ -85,7 +209,7 @@ int bar(int n){
// CHECK45-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8
// CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment)
+// CHECK45-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
@@ -97,7 +221,7 @@ int bar(int n){
// CHECK45-64-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr
// CHECK45-64-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8
// CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-64-NEXT: ret void
// CHECK45-64: worker.exit:
// CHECK45-64-NEXT: ret void
@@ -133,7 +257,7 @@ int bar(int n){
// CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment)
+// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
@@ -153,7 +277,7 @@ int bar(int n){
// CHECK45-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
// CHECK45-64-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 8
// CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
-// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-64-NEXT: ret void
// CHECK45-64: worker.exit:
// CHECK45-64-NEXT: ret void
@@ -192,13 +316,13 @@ int bar(int n){
// CHECK45-32-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK45-32-NEXT: entry:
// CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK45-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment)
+// CHECK45-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
-// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-NEXT: ret void
// CHECK45-32: worker.exit:
// CHECK45-32-NEXT: ret void
@@ -221,7 +345,7 @@ int bar(int n){
// CHECK45-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment)
+// CHECK45-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
@@ -233,7 +357,7 @@ int bar(int n){
// CHECK45-32-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr
// CHECK45-32-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4
// CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
-// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-NEXT: ret void
// CHECK45-32: worker.exit:
// CHECK45-32-NEXT: ret void
@@ -269,7 +393,7 @@ int bar(int n){
// CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment)
+// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
@@ -289,7 +413,7 @@ int bar(int n){
// CHECK45-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
// CHECK45-32-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4
// CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
-// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-NEXT: ret void
// CHECK45-32: worker.exit:
// CHECK45-32-NEXT: ret void
@@ -328,13 +452,13 @@ int bar(int n){
// CHECK45-32-EX-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK45-32-EX-NEXT: entry:
// CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment)
+// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
-// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: worker.exit:
// CHECK45-32-EX-NEXT: ret void
@@ -357,7 +481,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment)
+// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
@@ -369,7 +493,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr
// CHECK45-32-EX-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4
// CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
-// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: worker.exit:
// CHECK45-32-EX-NEXT: ret void
@@ -405,7 +529,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment)
+// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
@@ -425,7 +549,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
// CHECK45-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4
// CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
-// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: worker.exit:
// CHECK45-32-EX-NEXT: ret void
@@ -464,13 +588,13 @@ int bar(int n){
// CHECK-64-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
-// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -493,7 +617,7 @@ int bar(int n){
// CHECK-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -505,7 +629,7 @@ int bar(int n){
// CHECK-64-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr
// CHECK-64-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -541,7 +665,7 @@ int bar(int n){
// CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment)
+// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -561,7 +685,7 @@ int bar(int n){
// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 8
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -600,13 +724,13 @@ int bar(int n){
// CHECK-32-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
-// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -629,7 +753,7 @@ int bar(int n){
// CHECK-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -641,7 +765,7 @@ int bar(int n){
// CHECK-32-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr
// CHECK-32-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -677,7 +801,7 @@ int bar(int n){
// CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment)
+// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -697,7 +821,7 @@ int bar(int n){
// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -736,13 +860,13 @@ int bar(int n){
// CHECK-32-EX-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK-32-EX-NEXT: entry:
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -765,7 +889,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -777,7 +901,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -813,7 +937,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -833,7 +957,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
index 36c7f58fdd9382..5aab0ff2ba0cc6 100644
--- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
@@ -104,15 +104,15 @@ int bar(int n){
// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK-64-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_kernel_environment)
+// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -286,7 +286,7 @@ int bar(int n){
// CHECK-64-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_kernel_environment)
+// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -296,7 +296,7 @@ int bar(int n){
// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
// CHECK-64-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -523,7 +523,7 @@ int bar(int n){
// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_kernel_environment)
+// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -533,7 +533,7 @@ int bar(int n){
// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
// CHECK-64-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8
// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -774,15 +774,15 @@ int bar(int n){
// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK-32-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_kernel_environment)
+// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -956,7 +956,7 @@ int bar(int n){
// CHECK-32-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_kernel_environment)
+// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -966,7 +966,7 @@ int bar(int n){
// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
// CHECK-32-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -1193,7 +1193,7 @@ int bar(int n){
// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_kernel_environment)
+// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -1203,7 +1203,7 @@ int bar(int n){
// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
// CHECK-32-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4
// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -1444,15 +1444,15 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK-32-EX-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -1626,7 +1626,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -1636,7 +1636,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
// CHECK-32-EX-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -1863,7 +1863,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -1873,7 +1873,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
// CHECK-32-EX-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp
index 3556f5e2983d3a..39bfe06b7f5916 100644
--- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp
+++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp
@@ -35,15 +35,15 @@ void test() {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_kernel_environment)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]]
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR6:[0-9]+]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -527,7 +527,7 @@ void test() {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_kernel_environment)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -535,7 +535,7 @@ void test() {
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]]
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR6]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_target_printf_codegen.c b/clang/test/OpenMP/nvptx_target_printf_codegen.c
index 524a8f306a8c15..4da69f0e295aef 100644
--- a/clang/test/OpenMP/nvptx_target_printf_codegen.c
+++ b/clang/test/OpenMP/nvptx_target_printf_codegen.c
@@ -45,7 +45,7 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-64-NEXT: entry:
// CHECK-64-NEXT: [[FMT:%.*]] = alloca ptr, align 8
// CHECK-64-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS:%.*]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckSimple_l13_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -58,7 +58,7 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PRINTF_ARGS]], ptr [[TMP]], i32 0, i32 2
// CHECK-64-NEXT: store double 3.000000e+00, ptr [[TMP4]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = call i32 @__llvm_omp_vprintf(ptr [[TMP1]], ptr [[TMP]], i32 24)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -67,12 +67,12 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25
// CHECK-64-SAME: () #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__llvm_omp_vprintf(ptr @.str1, ptr null, i32 0)
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -84,7 +84,7 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-64-NEXT: [[FOO_ADDR:%.*]] = alloca i64, align 8
// CHECK-64-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS_0:%.*]], align 8
// CHECK-64-NEXT: store i64 [[FOO]], ptr [[FOO_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckAllocaIsInEntryBlock_l36_kernel_environment)
+// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -99,7 +99,7 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
// CHECK-64: if.end:
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-64-NEXT: ret void
//
//
@@ -108,7 +108,7 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-32-NEXT: entry:
// CHECK-32-NEXT: [[FMT:%.*]] = alloca ptr, align 4
// CHECK-32-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS:%.*]], align 8
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckSimple_l13_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -121,7 +121,7 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PRINTF_ARGS]], ptr [[TMP]], i32 0, i32 2
// CHECK-32-NEXT: store double 3.000000e+00, ptr [[TMP4]], align 8
// CHECK-32-NEXT: [[TMP5:%.*]] = call i32 @__llvm_omp_vprintf(ptr [[TMP1]], ptr [[TMP]], i32 24)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -130,12 +130,12 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25
// CHECK-32-SAME: () #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__llvm_omp_vprintf(ptr @.str1, ptr null, i32 0)
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -147,7 +147,7 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-32-NEXT: [[FOO_ADDR:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS_0:%.*]], align 8
// CHECK-32-NEXT: store i32 [[FOO]], ptr [[FOO_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckAllocaIsInEntryBlock_l36_kernel_environment)
+// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -162,6 +162,6 @@ void CheckAllocaIsInEntryBlock(void) {
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
// CHECK-32: if.end:
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK-32-NEXT: ret void
//
diff --git a/clang/test/OpenMP/nvptx_target_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_simd_codegen.cpp
index a8edfd7fa174ec..71ddf11959e7d4 100644
--- a/clang/test/OpenMP/nvptx_target_simd_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_simd_codegen.cpp
@@ -72,7 +72,7 @@ int bar(int n){
// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK45-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment)
+// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
@@ -124,7 +124,7 @@ int bar(int n){
// CHECK45-64-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4
// CHECK45-64-NEXT: br label [[SIMD_IF_END]]
// CHECK45-64: simd.if.end:
-// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-64-NEXT: ret void
//
//
@@ -142,7 +142,7 @@ int bar(int n){
// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK45-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
+// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
@@ -198,7 +198,7 @@ int bar(int n){
// CHECK45-64-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4
// CHECK45-64-NEXT: br label [[SIMD_IF_END]]
// CHECK45-64: simd.if.end:
-// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-64-NEXT: ret void
//
//
@@ -211,7 +211,7 @@ int bar(int n){
// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment)
+// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
@@ -244,7 +244,7 @@ int bar(int n){
// CHECK45-64-NEXT: ret void
// CHECK45-64: omp.inner.for.end:
// CHECK45-64-NEXT: store i32 10, ptr [[I]], align 4
-// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-64-NEXT: ret void
//
//
@@ -261,7 +261,7 @@ int bar(int n){
// CHECK45-64-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment)
+// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
@@ -299,7 +299,7 @@ int bar(int n){
// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4
// CHECK45-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK45-64-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4
-// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-64-NEXT: ret void
//
//
@@ -317,7 +317,7 @@ int bar(int n){
// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK45-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment)
+// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
@@ -368,7 +368,7 @@ int bar(int n){
// CHECK45-32-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4
// CHECK45-32-NEXT: br label [[SIMD_IF_END]]
// CHECK45-32: simd.if.end:
-// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-NEXT: ret void
//
//
@@ -386,7 +386,7 @@ int bar(int n){
// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK45-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
+// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
@@ -441,7 +441,7 @@ int bar(int n){
// CHECK45-32-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4
// CHECK45-32-NEXT: br label [[SIMD_IF_END]]
// CHECK45-32: simd.if.end:
-// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-NEXT: ret void
//
//
@@ -454,7 +454,7 @@ int bar(int n){
// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment)
+// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
@@ -486,7 +486,7 @@ int bar(int n){
// CHECK45-32-NEXT: ret void
// CHECK45-32: omp.inner.for.end:
// CHECK45-32-NEXT: store i32 10, ptr [[I]], align 4
-// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-NEXT: ret void
//
//
@@ -503,7 +503,7 @@ int bar(int n){
// CHECK45-32-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment)
+// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
@@ -540,7 +540,7 @@ int bar(int n){
// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4
// CHECK45-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK45-32-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4
-// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-NEXT: ret void
//
//
@@ -558,7 +558,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK45-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment)
+// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
@@ -609,7 +609,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4
// CHECK45-32-EX-NEXT: br label [[SIMD_IF_END]]
// CHECK45-32-EX: simd.if.end:
-// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-EX-NEXT: ret void
//
//
@@ -627,7 +627,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK45-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
+// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
@@ -682,7 +682,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4
// CHECK45-32-EX-NEXT: br label [[SIMD_IF_END]]
// CHECK45-32-EX: simd.if.end:
-// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-EX-NEXT: ret void
//
//
@@ -695,7 +695,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment)
+// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
@@ -727,7 +727,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: omp.inner.for.end:
// CHECK45-32-EX-NEXT: store i32 10, ptr [[I]], align 4
-// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-EX-NEXT: ret void
//
//
@@ -744,7 +744,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment)
+// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
@@ -781,7 +781,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4
// CHECK45-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK45-32-EX-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4
-// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-EX-NEXT: ret void
//
//
@@ -799,7 +799,7 @@ int bar(int n){
// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment)
+// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -851,7 +851,7 @@ int bar(int n){
// CHECK-64-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4
// CHECK-64-NEXT: br label [[SIMD_IF_END]]
// CHECK-64: simd.if.end:
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
//
//
@@ -869,7 +869,7 @@ int bar(int n){
// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
+// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -925,7 +925,7 @@ int bar(int n){
// CHECK-64-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4
// CHECK-64-NEXT: br label [[SIMD_IF_END]]
// CHECK-64: simd.if.end:
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
//
//
@@ -938,7 +938,7 @@ int bar(int n){
// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment)
+// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -971,7 +971,7 @@ int bar(int n){
// CHECK-64-NEXT: ret void
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
//
//
@@ -988,7 +988,7 @@ int bar(int n){
// CHECK-64-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment)
+// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -1026,7 +1026,7 @@ int bar(int n){
// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4
// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
//
//
@@ -1044,7 +1044,7 @@ int bar(int n){
// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment)
+// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -1095,7 +1095,7 @@ int bar(int n){
// CHECK-32-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4
// CHECK-32-NEXT: br label [[SIMD_IF_END]]
// CHECK-32: simd.if.end:
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
//
//
@@ -1113,7 +1113,7 @@ int bar(int n){
// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
+// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -1168,7 +1168,7 @@ int bar(int n){
// CHECK-32-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4
// CHECK-32-NEXT: br label [[SIMD_IF_END]]
// CHECK-32: simd.if.end:
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
//
//
@@ -1181,7 +1181,7 @@ int bar(int n){
// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment)
+// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -1213,7 +1213,7 @@ int bar(int n){
// CHECK-32-NEXT: ret void
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
//
//
@@ -1230,7 +1230,7 @@ int bar(int n){
// CHECK-32-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment)
+// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -1267,7 +1267,7 @@ int bar(int n){
// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4
// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
//
//
@@ -1285,7 +1285,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -1336,7 +1336,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4
// CHECK-32-EX-NEXT: br label [[SIMD_IF_END]]
// CHECK-32-EX: simd.if.end:
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
//
//
@@ -1354,7 +1354,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -1409,7 +1409,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4
// CHECK-32-EX-NEXT: br label [[SIMD_IF_END]]
// CHECK-32-EX: simd.if.end:
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
//
//
@@ -1422,7 +1422,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -1454,7 +1454,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
//
//
@@ -1471,7 +1471,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -1508,6 +1508,6 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4
// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
//
diff --git a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp
index 01eab5ff971ec7..cc2e6db8e4f53d 100644
--- a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp
@@ -57,18 +57,18 @@ int bar(int n){
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_kernel_environment)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[A_ADDR]], align 1
// CHECK1-NEXT: store i8 [[TMP2]], ptr [[A_CASTED]], align 1
// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2:[0-9]+]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -95,7 +95,7 @@ int bar(int n){
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28_kernel_environment)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -106,7 +106,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -133,7 +133,7 @@ int bar(int n){
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -144,7 +144,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -209,18 +209,18 @@ int bar(int n){
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_kernel_environment)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP2:%.*]] = load i8, ptr [[A_ADDR]], align 1
// CHECK2-NEXT: store i8 [[TMP2]], ptr [[A_CASTED]], align 1
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -247,7 +247,7 @@ int bar(int n){
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28_kernel_environment)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -258,7 +258,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -285,7 +285,7 @@ int bar(int n){
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -296,7 +296,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp
index 44c8a6de8830de..48a4212a85cd26 100644
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp
@@ -37,15 +37,15 @@ int bar(int n){
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_kernel_environment)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -155,15 +155,15 @@ int bar(int n){
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_kernel_environment)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
index ce8b2d6b4622e1..69eede672ba46d 100644
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
@@ -85,11 +85,11 @@ int bar(int n){
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_kernel_environment)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
@@ -99,7 +99,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -396,7 +396,7 @@ int bar(int n){
// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -407,7 +407,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -645,7 +645,7 @@ int bar(int n){
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -653,7 +653,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -833,7 +833,7 @@ int bar(int n){
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -844,7 +844,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -1059,7 +1059,7 @@ int bar(int n){
// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_kernel_environment)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -1070,7 +1070,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -1371,7 +1371,7 @@ int bar(int n){
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_kernel_environment)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -1383,7 +1383,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -1635,11 +1635,11 @@ int bar(int n){
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK2-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
@@ -1649,7 +1649,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -1946,7 +1946,7 @@ int bar(int n){
// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -1957,7 +1957,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -2195,7 +2195,7 @@ int bar(int n){
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -2203,7 +2203,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -2383,7 +2383,7 @@ int bar(int n){
// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -2394,7 +2394,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -2609,7 +2609,7 @@ int bar(int n){
// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -2620,7 +2620,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -2916,7 +2916,7 @@ int bar(int n){
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -2928,7 +2928,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -3180,11 +3180,11 @@ int bar(int n){
// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK3-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_kernel_environment)
+// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
@@ -3194,7 +3194,7 @@ int bar(int n){
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]]
-// CHECK3-NEXT: call void @__kmpc_target_deinit()
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@@ -3484,7 +3484,7 @@ int bar(int n){
// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
+// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
@@ -3495,7 +3495,7 @@ int bar(int n){
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK3-NEXT: call void @__kmpc_target_deinit()
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@@ -3727,7 +3727,7 @@ int bar(int n){
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment)
+// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
@@ -3735,7 +3735,7 @@ int bar(int n){
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK3-NEXT: call void @__kmpc_target_deinit()
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@@ -3909,7 +3909,7 @@ int bar(int n){
// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment)
+// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
@@ -3920,7 +3920,7 @@ int bar(int n){
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]]
-// CHECK3-NEXT: call void @__kmpc_target_deinit()
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@@ -4128,7 +4128,7 @@ int bar(int n){
// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_kernel_environment)
+// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
@@ -4139,7 +4139,7 @@ int bar(int n){
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK3-NEXT: call void @__kmpc_target_deinit()
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@@ -4443,7 +4443,7 @@ int bar(int n){
// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_kernel_environment)
+// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
@@ -4455,7 +4455,7 @@ int bar(int n){
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]]
-// CHECK3-NEXT: call void @__kmpc_target_deinit()
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
index 17b32dd2182728..7dc7d79e3175a4 100644
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
@@ -43,11 +43,11 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_kernel_environment)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8
@@ -57,7 +57,7 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -341,11 +341,11 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4
@@ -355,7 +355,7 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
index 6a3f968e74b106..11e7ab0c64cab0 100644
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
@@ -73,11 +73,11 @@ int bar(int n){
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_kernel_environment)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
@@ -87,7 +87,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -408,7 +408,7 @@ int bar(int n){
// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_kernel_environment)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -419,7 +419,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -681,7 +681,7 @@ int bar(int n){
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_kernel_environment)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -689,7 +689,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -883,7 +883,7 @@ int bar(int n){
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -894,7 +894,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -1128,11 +1128,11 @@ int bar(int n){
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
@@ -1142,7 +1142,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -1456,7 +1456,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -1467,7 +1467,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -1723,7 +1723,7 @@ int bar(int n){
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -1731,7 +1731,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -1919,7 +1919,7 @@ int bar(int n){
// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK2-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -1930,7 +1930,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp
index 20fe58ce10082e..7f06bfc200690a 100644
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp
@@ -81,11 +81,11 @@ int bar(int n){
// CHECK45-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK45-64-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
+// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
-// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK45-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
@@ -95,7 +95,7 @@ int bar(int n){
// CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]]
-// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-64-NEXT: ret void
// CHECK45-64: worker.exit:
// CHECK45-64-NEXT: ret void
@@ -246,7 +246,7 @@ int bar(int n){
// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK45-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment)
+// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
@@ -257,7 +257,7 @@ int bar(int n){
// CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-64-NEXT: ret void
// CHECK45-64: worker.exit:
// CHECK45-64-NEXT: ret void
@@ -396,7 +396,7 @@ int bar(int n){
// CHECK45-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment)
+// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
@@ -404,7 +404,7 @@ int bar(int n){
// CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-64-NEXT: ret void
// CHECK45-64: worker.exit:
// CHECK45-64-NEXT: ret void
@@ -515,7 +515,7 @@ int bar(int n){
// CHECK45-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK45-64-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment)
+// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
@@ -526,7 +526,7 @@ int bar(int n){
// CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]]
-// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-64-NEXT: ret void
// CHECK45-64: worker.exit:
// CHECK45-64-NEXT: ret void
@@ -664,11 +664,11 @@ int bar(int n){
// CHECK45-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK45-32-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
+// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
-// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK45-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
@@ -678,7 +678,7 @@ int bar(int n){
// CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]]
-// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-NEXT: ret void
// CHECK45-32: worker.exit:
// CHECK45-32-NEXT: ret void
@@ -828,7 +828,7 @@ int bar(int n){
// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK45-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment)
+// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
@@ -839,7 +839,7 @@ int bar(int n){
// CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-NEXT: ret void
// CHECK45-32: worker.exit:
// CHECK45-32-NEXT: ret void
@@ -977,7 +977,7 @@ int bar(int n){
// CHECK45-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment)
+// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
@@ -985,7 +985,7 @@ int bar(int n){
// CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-NEXT: ret void
// CHECK45-32: worker.exit:
// CHECK45-32-NEXT: ret void
@@ -1095,7 +1095,7 @@ int bar(int n){
// CHECK45-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK45-32-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment)
+// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
@@ -1106,7 +1106,7 @@ int bar(int n){
// CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]]
-// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-NEXT: ret void
// CHECK45-32: worker.exit:
// CHECK45-32-NEXT: ret void
@@ -1242,11 +1242,11 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK45-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
+// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
-// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK45-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
@@ -1256,7 +1256,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]]
-// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: worker.exit:
// CHECK45-32-EX-NEXT: ret void
@@ -1406,7 +1406,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK45-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment)
+// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
@@ -1417,7 +1417,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: worker.exit:
// CHECK45-32-EX-NEXT: ret void
@@ -1555,7 +1555,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment)
+// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
@@ -1563,7 +1563,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: worker.exit:
// CHECK45-32-EX-NEXT: ret void
@@ -1673,7 +1673,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK45-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment)
+// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
@@ -1684,7 +1684,7 @@ int bar(int n){
// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]]
-// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: worker.exit:
// CHECK45-32-EX-NEXT: ret void
@@ -1820,11 +1820,11 @@ int bar(int n){
// CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK-64-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
+// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
@@ -1834,7 +1834,7 @@ int bar(int n){
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -1985,7 +1985,7 @@ int bar(int n){
// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment)
+// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -1996,7 +1996,7 @@ int bar(int n){
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -2135,7 +2135,7 @@ int bar(int n){
// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment)
+// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -2143,7 +2143,7 @@ int bar(int n){
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -2254,7 +2254,7 @@ int bar(int n){
// CHECK-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK-64-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment)
+// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
@@ -2265,7 +2265,7 @@ int bar(int n){
// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]]
-// CHECK-64-NEXT: call void @__kmpc_target_deinit()
+// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
@@ -2403,11 +2403,11 @@ int bar(int n){
// CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
+// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
@@ -2417,7 +2417,7 @@ int bar(int n){
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -2567,7 +2567,7 @@ int bar(int n){
// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment)
+// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -2578,7 +2578,7 @@ int bar(int n){
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -2716,7 +2716,7 @@ int bar(int n){
// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment)
+// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -2724,7 +2724,7 @@ int bar(int n){
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -2834,7 +2834,7 @@ int bar(int n){
// CHECK-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment)
+// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
@@ -2845,7 +2845,7 @@ int bar(int n){
// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]]
-// CHECK-32-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
@@ -2981,11 +2981,11 @@ int bar(int n){
// CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
@@ -2995,7 +2995,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -3145,7 +3145,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -3156,7 +3156,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -3294,7 +3294,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -3302,7 +3302,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
@@ -3412,7 +3412,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment)
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
@@ -3423,7 +3423,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]]
-// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
+// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp
index bbff9c9f4fbc06..df04e1339693ff 100644
--- a/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp
@@ -76,18 +76,18 @@ int bar(int n){
// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_kernel_environment)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -325,7 +325,7 @@ int bar(int n){
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_kernel_environment)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -333,7 +333,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -513,7 +513,7 @@ int bar(int n){
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_kernel_environment)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -524,7 +524,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -739,7 +739,7 @@ int bar(int n){
// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_kernel_environment)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -750,7 +750,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -1051,7 +1051,7 @@ int bar(int n){
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_kernel_environment)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -1063,7 +1063,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR2]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -1312,18 +1312,18 @@ int bar(int n){
// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -1561,7 +1561,7 @@ int bar(int n){
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -1569,7 +1569,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -1749,7 +1749,7 @@ int bar(int n){
// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -1760,7 +1760,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -1975,7 +1975,7 @@ int bar(int n){
// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -1986,7 +1986,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -2282,7 +2282,7 @@ int bar(int n){
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -2294,7 +2294,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR2]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -2543,18 +2543,18 @@ int bar(int n){
// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_kernel_environment)
+// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]]
-// CHECK3-NEXT: call void @__kmpc_target_deinit()
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@@ -2786,7 +2786,7 @@ int bar(int n){
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_kernel_environment)
+// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
@@ -2794,7 +2794,7 @@ int bar(int n){
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
-// CHECK3-NEXT: call void @__kmpc_target_deinit()
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@@ -2968,7 +2968,7 @@ int bar(int n){
// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_kernel_environment)
+// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
@@ -2979,7 +2979,7 @@ int bar(int n){
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
-// CHECK3-NEXT: call void @__kmpc_target_deinit()
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@@ -3187,7 +3187,7 @@ int bar(int n){
// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_kernel_environment)
+// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
@@ -3198,7 +3198,7 @@ int bar(int n){
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
-// CHECK3-NEXT: call void @__kmpc_target_deinit()
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@@ -3502,7 +3502,7 @@ int bar(int n){
// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_kernel_environment)
+// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
@@ -3514,7 +3514,7 @@ int bar(int n){
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR2]]
-// CHECK3-NEXT: call void @__kmpc_target_deinit()
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp
index a2924dea7c20f9..ac6c7a308a853b 100644
--- a/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp
@@ -40,18 +40,18 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_kernel_environment)
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -290,18 +290,18 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_teams_codegen.cpp b/clang/test/OpenMP/nvptx_teams_codegen.cpp
index 5cbc5e178d43dc..10e379cbef05a9 100644
--- a/clang/test/OpenMP/nvptx_teams_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_teams_codegen.cpp
@@ -83,19 +83,19 @@ int main (int argc, char **argv) {
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK1-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 4)
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -122,7 +122,7 @@ int main (int argc, char **argv) {
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_kernel_environment)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -134,7 +134,7 @@ int main (int argc, char **argv) {
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]]
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 8)
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -161,19 +161,19 @@ int main (int argc, char **argv) {
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK2-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4)
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -200,7 +200,7 @@ int main (int argc, char **argv) {
// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_kernel_environment)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -212,7 +212,7 @@ int main (int argc, char **argv) {
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]]
// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4)
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -243,19 +243,19 @@ int main (int argc, char **argv) {
// CHECK3-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK3-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
// CHECK3-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
-// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_kernel_environment)
+// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK3-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 4)
-// CHECK3-NEXT: call void @__kmpc_target_deinit()
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@@ -286,7 +286,7 @@ int main (int argc, char **argv) {
// CHECK3-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK3-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
// CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8
-// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_kernel_environment)
+// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
@@ -298,7 +298,7 @@ int main (int argc, char **argv) {
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]]
// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 8)
-// CHECK3-NEXT: call void @__kmpc_target_deinit()
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@@ -329,19 +329,19 @@ int main (int argc, char **argv) {
// CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK4-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
// CHECK4-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
-// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_kernel_environment)
+// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK4: user_code.entry:
// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
// CHECK4-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
// CHECK4-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4
-// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK4-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
// CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4)
-// CHECK4-NEXT: call void @__kmpc_target_deinit()
+// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK4-NEXT: ret void
// CHECK4: worker.exit:
// CHECK4-NEXT: ret void
@@ -372,7 +372,7 @@ int main (int argc, char **argv) {
// CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK4-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
// CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4
-// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_kernel_environment)
+// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK4: user_code.entry:
@@ -384,7 +384,7 @@ int main (int argc, char **argv) {
// CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]]
// CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4)
-// CHECK4-NEXT: call void @__kmpc_target_deinit()
+// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK4-NEXT: ret void
// CHECK4: worker.exit:
// CHECK4-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp
index d700439e2508ec..51ea19632aec9f 100644
--- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp
@@ -57,19 +57,19 @@ int bar(int n){
// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[E]], ptr [[E_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_kernel_environment)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[E_ADDR]], align 8
// CHECK1-NEXT: [[E1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
// CHECK1-NEXT: store double [[TMP1]], ptr [[E1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[E1]], i64 8)
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -328,7 +328,7 @@ int bar(int n){
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8
// CHECK1-NEXT: store i64 [[D]], ptr [[D_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_kernel_environment)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -344,7 +344,7 @@ int bar(int n){
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]]
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i64 4)
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i64 1)
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -677,7 +677,7 @@ int bar(int n){
// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment)
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -685,7 +685,7 @@ int bar(int n){
// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
@@ -1250,17 +1250,17 @@ int bar(int n){
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_kernel_environment)
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK2-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP0]], align 8
// CHECK2-NEXT: store double [[TMP3]], ptr [[E1]], align 8
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -1519,7 +1519,7 @@ int bar(int n){
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4
// CHECK2-NEXT: store i32 [[D]], ptr [[D_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_kernel_environment)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -1535,7 +1535,7 @@ int bar(int n){
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]]
// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4)
// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1)
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -1868,7 +1868,7 @@ int bar(int n){
// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment)
+// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -1876,7 +1876,7 @@ int bar(int n){
// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]]
-// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
@@ -2441,17 +2441,17 @@ int bar(int n){
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_kernel_environment)
+// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK3-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP0]], align 8
// CHECK3-NEXT: store double [[TMP3]], ptr [[E1]], align 8
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]]
-// CHECK3-NEXT: call void @__kmpc_target_deinit()
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@@ -2710,7 +2710,7 @@ int bar(int n){
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4
// CHECK3-NEXT: store i32 [[D]], ptr [[D_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_kernel_environment)
+// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
@@ -2726,7 +2726,7 @@ int bar(int n){
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]]
// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4)
// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1)
-// CHECK3-NEXT: call void @__kmpc_target_deinit()
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
@@ -3059,7 +3059,7 @@ int bar(int n){
// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment)
+// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
@@ -3067,7 +3067,7 @@ int bar(int n){
// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]]
-// CHECK3-NEXT: call void @__kmpc_target_deinit()
+// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
// CHECK3-NEXT: ret void
diff --git a/clang/test/OpenMP/reduction_implicit_map.cpp b/clang/test/OpenMP/reduction_implicit_map.cpp
index 40f00fe134ac6f..1674e8eca18448 100644
--- a/clang/test/OpenMP/reduction_implicit_map.cpp
+++ b/clang/test/OpenMP/reduction_implicit_map.cpp
@@ -103,16 +103,16 @@ int main()
// CHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32_kernel_environment)
+// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
-// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[E_ADDR]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK-NEXT: store ptr [[TMP2]], ptr [[TMP3]], align 8
// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-// CHECK-NEXT: call void @__kmpc_target_deinit()
+// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
// CHECK-NEXT: ret void
// CHECK: worker.exit:
// CHECK-NEXT: ret void
diff --git a/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c
index 6d0cdc2ca6d805..23fc7ef858257f 100644
--- a/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c
+++ b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c
@@ -84,9 +84,9 @@ void spmd(void) {
}
#pragma omp begin declare target device_type(nohost)
-struct KernelEnvironmentTy;
-__attribute__((weak))
-extern "C" int __kmpc_target_init(struct KernelEnvironmentTy *) { // all-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}}
+__attribute__((weak))
+extern "C" int __kmpc_target_init(void *Ident, char Mode,
+ bool UseGenericStateMachine) { // all-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}}
return 0;
}
#pragma omp end declare target
diff --git a/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c
index 335d97cc97c22a..498380ff44d927 100644
--- a/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c
+++ b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c
@@ -40,9 +40,9 @@ void spmd(void) {
}
#pragma omp begin declare target device_type(nohost)
-struct KernelEnvironmentTy;
-__attribute__((weak))
-extern "C" int __kmpc_target_init(struct KernelEnvironmentTy *) { // expected-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}}
+__attribute__((weak))
+extern "C" int __kmpc_target_init(void *Ident, char Mode,
+ bool UseGenericStateMachine) { // expected-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}}
return 0;
}
#pragma omp end declare target
diff --git a/clang/test/OpenMP/target_parallel_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_debug_codegen.cpp
index ea872af611e023..0102378807d552 100644
--- a/clang/test/OpenMP/target_parallel_debug_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_debug_codegen.cpp
@@ -95,7 +95,7 @@ int main() {
// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG47]]
// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG47]]
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG47]]
-// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment), !dbg [[DBG47]]
+// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false), !dbg [[DBG47]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG47]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG47]]
// CHECK1: user_code.entry:
@@ -113,7 +113,7 @@ int main() {
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG48]]
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG48]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP9]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG48]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG49:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB5:[0-9]+]], i8 2), !dbg [[DBG49:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG51:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG47]]
@@ -308,11 +308,11 @@ int main() {
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG137]]
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG137]]
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG137]]
-// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_kernel_environment), !dbg [[DBG137]]
+// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB7:[0-9]+]], i8 2, i1 false), !dbg [[DBG137]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG137]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG137]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB7:[0-9]+]])
+// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB9:[0-9]+]])
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG138:![0-9]+]]
// CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4, !dbg [[DBG138]]
// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG138]]
@@ -325,8 +325,8 @@ int main() {
// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG138]]
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG138]]
// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG138]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB7]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG138]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG139:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB9]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG138]]
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB11:[0-9]+]], i8 2), !dbg [[DBG139:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG141:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG137]]
@@ -517,11 +517,11 @@ int main() {
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG212]]
// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG212]]
// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG212]]
-// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_kernel_environment), !dbg [[DBG212]]
+// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB13:[0-9]+]], i8 2, i1 false), !dbg [[DBG212]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG212]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG212]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB11:[0-9]+]])
+// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]])
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG213:![0-9]+]]
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG213]]
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG213]]
@@ -530,8 +530,8 @@ int main() {
// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG213]]
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG213]]
// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG213]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB11]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG213]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG214:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG213]]
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2), !dbg [[DBG214:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG216:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG212]]
diff --git a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp
index 12dbf4229f5bab..9eeef1d46a55bc 100644
--- a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp
@@ -89,7 +89,7 @@ int main() {
// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG41]]
// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG41]]
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG41]]
-// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_kernel_environment), !dbg [[DBG41]]
+// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false), !dbg [[DBG41]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG41]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG41]]
// CHECK1: user_code.entry:
@@ -110,7 +110,7 @@ int main() {
// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG43]]
// CHECK1-NEXT: [[TMP18:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG42]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP18]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG42]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG45:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB8:[0-9]+]], i8 2), !dbg [[DBG45:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG46:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG41]]
@@ -383,11 +383,11 @@ int main() {
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG146]]
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG146]]
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG146]]
-// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_kernel_environment), !dbg [[DBG146]]
+// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB10:[0-9]+]], i8 2, i1 false), !dbg [[DBG146]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG146]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG146]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]])
+// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]])
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG147:![0-9]+]]
// CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4, !dbg [[DBG147]]
// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG147]]
@@ -400,8 +400,8 @@ int main() {
// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG147]]
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG147]]
// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG147]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB13]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG147]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG148:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG147]]
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2), !dbg [[DBG148:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG150:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG146]]
@@ -466,7 +466,7 @@ int main() {
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]]
// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG165]]
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG165]]
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB10:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG173:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG173:![0-9]+]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG165]]
// CHECK1: omp.dispatch.cond:
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]]
@@ -559,7 +559,7 @@ int main() {
// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG165]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG173]], !llvm.loop [[LOOP203:![0-9]+]]
// CHECK1: omp.dispatch.end:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG202:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB14:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG202:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG204:![0-9]+]]
//
//
@@ -665,11 +665,11 @@ int main() {
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG236]]
// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG236]]
// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG236]]
-// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_kernel_environment), !dbg [[DBG236]]
+// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB19:[0-9]+]], i8 2, i1 false), !dbg [[DBG236]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG236]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG236]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]])
+// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB24:[0-9]+]])
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG237:![0-9]+]]
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG237]]
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG237]]
@@ -678,8 +678,8 @@ int main() {
// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG237]]
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG237]]
// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG237]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB20]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG237]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG238:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB24]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG237]]
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB26:[0-9]+]], i8 2), !dbg [[DBG238:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG240:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG236]]
@@ -749,7 +749,7 @@ int main() {
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META262:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]]
// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG255]]
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG255]]
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB17:[0-9]+]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG263:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB21:[0-9]+]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG263:![0-9]+]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG255]]
// CHECK1: omp.dispatch.cond:
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]]
@@ -846,7 +846,7 @@ int main() {
// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG255]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG263]], !llvm.loop [[LOOP294:![0-9]+]]
// CHECK1: omp.dispatch.end:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB19:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG293:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB23:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG293:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG295:![0-9]+]]
//
//
diff --git a/clang/test/OpenMP/target_parallel_generic_loop_codegen-3.cpp b/clang/test/OpenMP/target_parallel_generic_loop_codegen-3.cpp
index b92d173e13d63d..0709ae53da28f9 100644
--- a/clang/test/OpenMP/target_parallel_generic_loop_codegen-3.cpp
+++ b/clang/test/OpenMP/target_parallel_generic_loop_codegen-3.cpp
@@ -89,7 +89,7 @@ int main() {
// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG41]]
// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG41]]
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG41]]
-// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_kernel_environment), !dbg [[DBG41]]
+// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false), !dbg [[DBG41]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG41]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG41]]
// CHECK1: user_code.entry:
@@ -110,7 +110,7 @@ int main() {
// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG43]]
// CHECK1-NEXT: [[TMP18:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG42]]
// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP18]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG42]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG45:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB8:[0-9]+]], i8 2), !dbg [[DBG45:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG46:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG41]]
@@ -383,11 +383,11 @@ int main() {
// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG146]]
// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG146]]
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG146]]
-// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_kernel_environment), !dbg [[DBG146]]
+// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB10:[0-9]+]], i8 2, i1 false), !dbg [[DBG146]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG146]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG146]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]])
+// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]])
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG147:![0-9]+]]
// CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4, !dbg [[DBG147]]
// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG147]]
@@ -400,8 +400,8 @@ int main() {
// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG147]]
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG147]]
// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG147]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB13]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG147]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG148:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG147]]
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2), !dbg [[DBG148:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG150:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG146]]
@@ -466,7 +466,7 @@ int main() {
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]]
// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG165]]
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG165]]
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB10:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG173:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG173:![0-9]+]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG165]]
// CHECK1: omp.dispatch.cond:
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]]
@@ -559,7 +559,7 @@ int main() {
// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG165]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG173]], !llvm.loop [[LOOP203:![0-9]+]]
// CHECK1: omp.dispatch.end:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG202:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB14:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG202:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG204:![0-9]+]]
//
//
@@ -665,11 +665,11 @@ int main() {
// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG236]]
// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG236]]
// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG236]]
-// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_kernel_environment), !dbg [[DBG236]]
+// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB19:[0-9]+]], i8 2, i1 false), !dbg [[DBG236]]
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG236]]
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG236]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]])
+// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB24:[0-9]+]])
// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG237:![0-9]+]]
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG237]]
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG237]]
@@ -678,8 +678,8 @@ int main() {
// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG237]]
// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG237]]
// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG237]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB20]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG237]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG238:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB24]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG237]]
+// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB26:[0-9]+]], i8 2), !dbg [[DBG238:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG240:![0-9]+]]
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void, !dbg [[DBG236]]
@@ -749,7 +749,7 @@ int main() {
// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META262:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]]
// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG255]]
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG255]]
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB17:[0-9]+]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG263:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB21:[0-9]+]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG263:![0-9]+]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG255]]
// CHECK1: omp.dispatch.cond:
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]]
@@ -846,7 +846,7 @@ int main() {
// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG255]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG263]], !llvm.loop [[LOOP294:![0-9]+]]
// CHECK1: omp.dispatch.end:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB19:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG293:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB23:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG293:![0-9]+]]
// CHECK1-NEXT: ret void, !dbg [[DBG295:![0-9]+]]
//
//
diff --git a/clang/test/OpenMP/target_parallel_generic_loop_codegen.cpp b/clang/test/OpenMP/target_parallel_generic_loop_codegen.cpp
index bd889ccad7b14b..a00f3de01923c9 100644
--- a/clang/test/OpenMP/target_parallel_generic_loop_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_generic_loop_codegen.cpp
@@ -53,18 +53,18 @@ int main() {
// IR-GPU-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8
// IR-GPU-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST]], align 8
// IR-GPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8
-// IR-GPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_kernel_environment to ptr))
+// IR-GPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 2, i1 false)
// IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// IR-GPU: user_code.entry:
-// IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
+// IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST]], align 8
// IR-GPU-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// IR-GPU-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8
// IR-GPU-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// IR-GPU-NEXT: store ptr [[TMP3]], ptr [[TMP5]], align 8
// IR-GPU-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 64, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
-// IR-GPU-NEXT: call void @__kmpc_target_deinit()
+// IR-GPU-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2)
// IR-GPU-NEXT: ret void
// IR-GPU: worker.exit:
// IR-GPU-NEXT: ret void
diff --git a/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp
index f32e7e9993e64b..70a1c54fee33f2 100644
--- a/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp
@@ -1158,18 +1158,18 @@ int foo() {
// IR-GPU-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8
// IR-GPU-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8
// IR-GPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8
-// IR-GPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_kernel_environment to ptr))
+// IR-GPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 2, i1 false)
// IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// IR-GPU: user_code.entry:
-// IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
+// IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
// IR-GPU-NEXT: [[TMP3:%.*]] = load i32, ptr [[J_ADDR_ASCAST]], align 4
// IR-GPU-NEXT: store i32 [[TMP3]], ptr [[J_CASTED_ASCAST]], align 4
// IR-GPU-NEXT: [[TMP4:%.*]] = load i64, ptr [[J_CASTED_ASCAST]], align 8
// IR-GPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
// IR-GPU-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
// IR-GPU-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]]
-// IR-GPU-NEXT: call void @__kmpc_target_deinit()
+// IR-GPU-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2)
// IR-GPU-NEXT: ret void
// IR-GPU: worker.exit:
// IR-GPU-NEXT: ret void
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index a477c80c14ec80..fc838765949c4d 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1997,7 +1997,8 @@ class OpenMPIRBuilder {
/// Create a runtime call for kmpc_target_deinit
///
/// \param Loc The insert and source location description.
- void createTargetDeinit(const LocationDescription &Loc);
+ /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
+ void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD);
///}
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 816c9a3f054e24..b2dfda490585b5 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -96,11 +96,6 @@ __OMP_STRUCT_TYPE(KernelArgs, __tgt_kernel_arguments, false, Int32, Int32, VoidP
Int64, Int64, Int32Arr3Ty, Int32Arr3Ty, Int32)
__OMP_STRUCT_TYPE(AsyncInfo, __tgt_async_info, false, Int8Ptr)
__OMP_STRUCT_TYPE(DependInfo, kmp_dep_info, false, SizeTy, SizeTy, Int8)
-__OMP_STRUCT_TYPE(ConfigurationEnvironment, ConfigurationEnvironmentTy, false,
- Int8, Int8, Int8)
-__OMP_STRUCT_TYPE(DynamicEnvironment, DynamicEnvironmentTy, false, Int16)
-__OMP_STRUCT_TYPE(KernelEnvironment, KernelEnvironmentTy, false,
- ConfigurationEnvironment, IdentPtr, DynamicEnvironmentPtr)
#undef __OMP_STRUCT_TYPE
#undef OMP_STRUCT_TYPE
@@ -457,8 +452,8 @@ __OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr,
/* Int */ Int32, /* kmp_task_t */ VoidPtr)
/// OpenMP Device runtime functions
-__OMP_RTL(__kmpc_target_init, false, Int32, KernelEnvironmentPtr)
-__OMP_RTL(__kmpc_target_deinit, false, Void,)
+__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int8, Int1)
+__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int8)
__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr)
__OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32,
VoidPtr, VoidPtr, VoidPtrPtr, SizeTy)
@@ -1025,9 +1020,9 @@ __OMP_RTL_ATTRS(__kmpc_task_allow_completion_event, DefaultAttrs,
ReturnPtrAttrs, ParamAttrs(ReadOnlyPtrAttrs, SExt))
__OMP_RTL_ATTRS(__kmpc_target_init, AttributeSet(), SExt,
- ParamAttrs(AttributeSet()))
+ ParamAttrs(AttributeSet(), SExt, SExt))
__OMP_RTL_ATTRS(__kmpc_target_deinit, AttributeSet(), AttributeSet(),
- ParamAttrs())
+ ParamAttrs(AttributeSet(), SExt))
__OMP_RTL_ATTRS(__kmpc_parallel_51, AlwaysInlineAttrs, AttributeSet(),
ParamAttrs(AttributeSet(), SExt, SExt, SExt, SExt,
AttributeSet(), AttributeSet(), AttributeSet(),
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 8cf57af88470e4..4c3696f9c342ab 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -24,7 +24,6 @@
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
@@ -4017,60 +4016,14 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD) {
ConstantInt *IsSPMDVal = ConstantInt::getSigned(
IntegerType::getInt8Ty(Int8->getContext()),
IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
- ConstantInt *UseGenericStateMachineVal = ConstantInt::getSigned(
- IntegerType::getInt8Ty(Int8->getContext()), !IsSPMD);
- ConstantInt *MayUseNestedParallelismVal =
- ConstantInt::getSigned(IntegerType::getInt8Ty(Int8->getContext()), true);
- ConstantInt *DebugIndentionLevelVal =
- ConstantInt::getSigned(IntegerType::getInt16Ty(Int8->getContext()), 0);
-
- // We need to strip the debug prefix to get the correct kernel name.
- Function *Kernel = Builder.GetInsertBlock()->getParent();
- StringRef KernelName = Kernel->getName();
- const std::string DebugPrefix = "_debug__";
- if (KernelName.ends_with(DebugPrefix))
- KernelName = KernelName.drop_back(DebugPrefix.length());
+ ConstantInt *UseGenericStateMachine =
+ ConstantInt::getBool(Int32->getContext(), !IsSPMD);
Function *Fn = getOrCreateRuntimeFunctionPtr(
omp::RuntimeFunction::OMPRTL___kmpc_target_init);
- const DataLayout &DL = Fn->getParent()->getDataLayout();
-
- Twine DynamicEnvironmentName = KernelName + "_dynamic_environment";
- Constant *DynamicEnvironmentInitializer =
- ConstantStruct::get(DynamicEnvironment, {DebugIndentionLevelVal});
- Constant *DynamicEnvironmentGV = new GlobalVariable(
- M, DynamicEnvironment, /* IsConstant */ false,
- GlobalValue::InternalLinkage, DynamicEnvironmentInitializer,
- DynamicEnvironmentName,
- /* InsertBefore */ nullptr, llvm::GlobalValue::NotThreadLocal,
- DL.getDefaultGlobalsAddressSpace());
- if (DynamicEnvironmentGV->getType() != DynamicEnvironmentPtr)
- DynamicEnvironmentGV = ConstantExpr::getAddrSpaceCast(
- DynamicEnvironmentGV, DynamicEnvironmentPtr);
-
- Constant *ConfigurationEnvironmentInitializer = ConstantStruct::get(
- ConfigurationEnvironment, {
- UseGenericStateMachineVal,
- MayUseNestedParallelismVal,
- IsSPMDVal,
- });
- Constant *KernelEnvironmentInitializer = ConstantStruct::get(
- KernelEnvironment, {
- ConfigurationEnvironmentInitializer,
- Ident,
- DynamicEnvironmentGV,
- });
- Twine KernelEnvironmentName = KernelName + "_kernel_environment";
- Constant *KernelEnvironmentGV = new GlobalVariable(
- M, KernelEnvironment, /* IsConstant */ true, GlobalValue::ExternalLinkage,
- KernelEnvironmentInitializer, KernelEnvironmentName,
- /* InsertBefore */ nullptr, llvm::GlobalValue::NotThreadLocal,
- DL.getDefaultGlobalsAddressSpace());
- if (KernelEnvironmentGV->getType() != KernelEnvironmentPtr)
- KernelEnvironmentGV = ConstantExpr::getAddrSpaceCast(KernelEnvironmentGV,
- KernelEnvironmentPtr);
-
- CallInst *ThreadKind = Builder.CreateCall(Fn, {KernelEnvironmentGV});
+
+ CallInst *ThreadKind = Builder.CreateCall(
+ Fn, {Ident, IsSPMDVal, UseGenericStateMachine});
Value *ExecUserCode = Builder.CreateICmpEQ(
ThreadKind, ConstantInt::get(ThreadKind->getType(), -1),
@@ -4103,14 +4056,22 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD) {
return InsertPointTy(UserCodeEntryBB, UserCodeEntryBB->getFirstInsertionPt());
}
-void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc) {
+void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc,
+ bool IsSPMD) {
if (!updateToLocation(Loc))
return;
+ uint32_t SrcLocStrSize;
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
+ Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ ConstantInt *IsSPMDVal = ConstantInt::getSigned(
+ IntegerType::getInt8Ty(Int8->getContext()),
+ IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
+
Function *Fn = getOrCreateRuntimeFunctionPtr(
omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
- Builder.CreateCall(Fn, {});
+ Builder.CreateCall(Fn, {Ident, IsSPMDVal});
}
void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
@@ -4360,7 +4321,7 @@ createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
// Insert target deinit call in the device compilation pass.
if (OMPBuilder.Config.isTargetDevice())
- OMPBuilder.createTargetDeinit(Builder);
+ OMPBuilder.createTargetDeinit(Builder, /*IsSPMD*/ false);
// Insert return instruction.
Builder.CreateRetVoid();
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 0a3832e2585de4..6aba57d49af8f3 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -33,7 +33,6 @@
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
-#include "llvm/Frontend/OpenMP/OMPDeviceConstants.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/Assumptions.h"
#include "llvm/IR/BasicBlock.h"
@@ -182,80 +181,6 @@ STATISTIC(NumBarriersEliminated, "Number of redundant barriers eliminated");
static constexpr auto TAG = "[" DEBUG_TYPE "]";
#endif
-namespace KernelInfo {
-
-// struct ConfigurationEnvironmentTy {
-// uint8_t UseGenericStateMachine;
-// uint8_t MayUseNestedParallelism;
-// llvm::omp::OMPTgtExecModeFlags ExecMode;
-// };
-
-// struct DynamicEnvironmentTy {
-// uint16_t DebugIndentionLevel;
-// };
-
-// struct KernelEnvironmentTy {
-// ConfigurationEnvironmentTy Configuration;
-// IdentTy *Ident;
-// DynamicEnvironmentTy *DynamicEnv;
-// };
-
-#define KERNEL_ENVIRONMENT_IDX(MEMBER, IDX) \
- constexpr const unsigned MEMBER##Idx = IDX;
-
-KERNEL_ENVIRONMENT_IDX(Configuration, 0)
-KERNEL_ENVIRONMENT_IDX(Ident, 1)
-
-#undef KERNEL_ENVIRONMENT_IDX
-
-#define KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MEMBER, IDX) \
- constexpr const unsigned MEMBER##Idx = IDX;
-
-KERNEL_ENVIRONMENT_CONFIGURATION_IDX(UseGenericStateMachine, 0)
-KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MayUseNestedParallelism, 1)
-KERNEL_ENVIRONMENT_CONFIGURATION_IDX(ExecMode, 2)
-
-#undef KERNEL_ENVIRONMENT_CONFIGURATION_IDX
-
-#define KERNEL_ENVIRONMENT_GETTER(MEMBER, RETURNTYPE) \
- RETURNTYPE *get##MEMBER##FromKernelEnvironment(ConstantStruct *KernelEnvC) { \
- return cast<RETURNTYPE>(KernelEnvC->getAggregateElement(MEMBER##Idx)); \
- }
-
-KERNEL_ENVIRONMENT_GETTER(Ident, Constant)
-KERNEL_ENVIRONMENT_GETTER(Configuration, ConstantStruct)
-
-#undef KERNEL_ENVIRONMENT_GETTER
-
-#define KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MEMBER) \
- ConstantInt *get##MEMBER##FromKernelEnvironment( \
- ConstantStruct *KernelEnvC) { \
- ConstantStruct *ConfigC = \
- getConfigurationFromKernelEnvironment(KernelEnvC); \
- return dyn_cast<ConstantInt>(ConfigC->getAggregateElement(MEMBER##Idx)); \
- }
-
-KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(UseGenericStateMachine)
-KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MayUseNestedParallelism)
-KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(ExecMode)
-
-#undef KERNEL_ENVIRONMENT_CONFIGURATION_GETTER
-
-GlobalVariable *
-getKernelEnvironementGVFromKernelInitCB(CallBase *KernelInitCB) {
- constexpr const int InitKernelEnvironmentArgNo = 0;
- return cast<GlobalVariable>(
- KernelInitCB->getArgOperand(InitKernelEnvironmentArgNo)
- ->stripPointerCasts());
-}
-
-ConstantStruct *getKernelEnvironementFromKernelInitCB(CallBase *KernelInitCB) {
- GlobalVariable *KernelEnvGV =
- getKernelEnvironementGVFromKernelInitCB(KernelInitCB);
- return cast<ConstantStruct>(KernelEnvGV->getInitializer());
-}
-} // namespace KernelInfo
-
namespace {
struct AAHeapToShared;
@@ -685,10 +610,6 @@ struct KernelInfoState : AbstractState {
/// one we abort as the kernel is malformed.
CallBase *KernelInitCB = nullptr;
- /// The constant kernel environement as taken from and passed to
- /// __kmpc_target_init.
- ConstantStruct *KernelEnvC = nullptr;
-
/// The __kmpc_target_deinit call in this kernel, if any. If we find more than
/// one we abort as the kernel is malformed.
CallBase *KernelDeinitCB = nullptr;
@@ -793,12 +714,6 @@ struct KernelInfoState : AbstractState {
"assumptions.");
KernelDeinitCB = KIS.KernelDeinitCB;
}
- if (KIS.KernelEnvC) {
- if (KernelEnvC && KernelEnvC != KIS.KernelEnvC)
- llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "
- "assumptions.");
- KernelEnvC = KIS.KernelEnvC;
- }
SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker;
ReachedKnownParallelRegions ^= KIS.ReachedKnownParallelRegions;
ReachedUnknownParallelRegions ^= KIS.ReachedUnknownParallelRegions;
@@ -2864,11 +2779,9 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr;
if (!CB)
return false;
- ConstantStruct *KernelEnvC =
- KernelInfo::getKernelEnvironementFromKernelInitCB(CB);
- ConstantInt *ExecModeC =
- KernelInfo::getExecModeFromKernelEnvironment(KernelEnvC);
- return ExecModeC->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC;
+ const int InitModeArgNo = 1;
+ auto *ModeCI = dyn_cast<ConstantInt>(CB->getOperand(InitModeArgNo));
+ return ModeCI && (ModeCI->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC);
}
if (C->isZero()) {
@@ -3587,29 +3500,6 @@ struct AAKernelInfoFunction : AAKernelInfo {
return GuardedInstructions;
}
- void setConfigurationOfKernelEnvironment(ConstantStruct *ConfigC) {
- Constant *NewKernelEnvC = ConstantFoldInsertValueInstruction(
- KernelEnvC, ConfigC, {KernelInfo::ConfigurationIdx});
- assert(NewKernelEnvC && "Failed to create new kernel environment");
- KernelEnvC = cast<ConstantStruct>(NewKernelEnvC);
- }
-
-#define KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MEMBER) \
- void set##MEMBER##OfKernelEnvironment(ConstantInt *NewVal) { \
- ConstantStruct *ConfigC = \
- KernelInfo::getConfigurationFromKernelEnvironment(KernelEnvC); \
- Constant *NewConfigC = ConstantFoldInsertValueInstruction( \
- ConfigC, NewVal, {KernelInfo::MEMBER##Idx}); \
- assert(NewConfigC && "Failed to create new configuration environment"); \
- setConfigurationOfKernelEnvironment(cast<ConstantStruct>(NewConfigC)); \
- }
-
- KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(UseGenericStateMachine)
- KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MayUseNestedParallelism)
- KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(ExecMode)
-
-#undef KERNEL_ENVIRONMENT_CONFIGURATION_SETTER
-
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
// This is a high-level transform that might change the constant arguments
@@ -3658,52 +3548,61 @@ struct AAKernelInfoFunction : AAKernelInfo {
ReachingKernelEntries.insert(Fn);
IsKernelEntry = true;
- KernelEnvC =
- KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB);
- GlobalVariable *KernelEnvGV =
- KernelInfo::getKernelEnvironementGVFromKernelInitCB(KernelInitCB);
+ // For kernels we might need to initialize/finalize the IsSPMD state and
+ // we need to register a simplification callback so that the Attributor
+ // knows the constant arguments to __kmpc_target_init and
+ // __kmpc_target_deinit might actually change.
- Attributor::GlobalVariableSimplifictionCallbackTy
- KernelConfigurationSimplifyCB =
- [&](const GlobalVariable &GV, const AbstractAttribute *AA,
- bool &UsedAssumedInformation) -> std::optional<Constant *> {
- return KernelEnvC;
+ Attributor::SimplifictionCallbackTy StateMachineSimplifyCB =
+ [&](const IRPosition &IRP, const AbstractAttribute *AA,
+ bool &UsedAssumedInformation) -> std::optional<Value *> {
+ return nullptr;
+ };
+
+ Attributor::SimplifictionCallbackTy ModeSimplifyCB =
+ [&](const IRPosition &IRP, const AbstractAttribute *AA,
+ bool &UsedAssumedInformation) -> std::optional<Value *> {
+ // IRP represents the "SPMDCompatibilityTracker" argument of an
+ // __kmpc_target_init or
+ // __kmpc_target_deinit call. We will answer this one with the internal
+ // state.
+ if (!SPMDCompatibilityTracker.isValidState())
+ return nullptr;
+ if (!SPMDCompatibilityTracker.isAtFixpoint()) {
+ if (AA)
+ A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
+ UsedAssumedInformation = true;
+ } else {
+ UsedAssumedInformation = false;
+ }
+ auto *Val = ConstantInt::getSigned(
+ IntegerType::getInt8Ty(IRP.getAnchorValue().getContext()),
+ SPMDCompatibilityTracker.isAssumed() ? OMP_TGT_EXEC_MODE_SPMD
+ : OMP_TGT_EXEC_MODE_GENERIC);
+ return Val;
};
- A.registerGlobalVariableSimplificationCallback(
- *KernelEnvGV, KernelConfigurationSimplifyCB);
+ constexpr const int InitModeArgNo = 1;
+ constexpr const int DeinitModeArgNo = 1;
+ constexpr const int InitUseStateMachineArgNo = 2;
+ A.registerSimplificationCallback(
+ IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo),
+ StateMachineSimplifyCB);
+ A.registerSimplificationCallback(
+ IRPosition::callsite_argument(*KernelInitCB, InitModeArgNo),
+ ModeSimplifyCB);
+ A.registerSimplificationCallback(
+ IRPosition::callsite_argument(*KernelDeinitCB, DeinitModeArgNo),
+ ModeSimplifyCB);
// Check if we know we are in SPMD-mode already.
- ConstantInt *ExecModeC =
- KernelInfo::getExecModeFromKernelEnvironment(KernelEnvC);
- ConstantInt *AssumedExecModeC = ConstantInt::get(
- ExecModeC->getType(),
- ExecModeC->getSExtValue() | OMP_TGT_EXEC_MODE_GENERIC_SPMD);
- if (ExecModeC->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)
+ ConstantInt *ModeArg =
+ dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo));
+ if (ModeArg && (ModeArg->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
SPMDCompatibilityTracker.indicateOptimisticFixpoint();
+ // This is a generic region but SPMDization is disabled so stop tracking.
else if (DisableOpenMPOptSPMDization)
- // This is a generic region but SPMDization is disabled so stop
- // tracking.
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
- else
- setExecModeOfKernelEnvironment(AssumedExecModeC);
-
- ConstantInt *MayUseNestedParallelismC =
- KernelInfo::getMayUseNestedParallelismFromKernelEnvironment(KernelEnvC);
- ConstantInt *AssumedMayUseNestedParallelismC = ConstantInt::get(
- MayUseNestedParallelismC->getType(), NestedParallelism);
- setMayUseNestedParallelismOfKernelEnvironment(
- AssumedMayUseNestedParallelismC);
-
- if (!DisableOpenMPOptStateMachineRewrite) {
- ConstantInt *UseGenericStateMachineC =
- KernelInfo::getUseGenericStateMachineFromKernelEnvironment(
- KernelEnvC);
- ConstantInt *AssumedUseGenericStateMachineC =
- ConstantInt::get(UseGenericStateMachineC->getType(), false);
- setUseGenericStateMachineOfKernelEnvironment(
- AssumedUseGenericStateMachineC);
- }
// Register virtual uses of functions we might need to preserve.
auto RegisterVirtualUse = [&](RuntimeFunction RFKind,
@@ -3804,21 +3703,21 @@ struct AAKernelInfoFunction : AAKernelInfo {
if (!KernelInitCB || !KernelDeinitCB)
return ChangeStatus::UNCHANGED;
- ChangeStatus Changed = ChangeStatus::UNCHANGED;
+ /// Insert nested Parallelism global variable
+ Function *Kernel = getAnchorScope();
+ Module &M = *Kernel->getParent();
+ Type *Int8Ty = Type::getInt8Ty(M.getContext());
+ new GlobalVariable(M, Int8Ty, /* isConstant */ true,
+ GlobalValue::WeakAnyLinkage,
+ ConstantInt::get(Int8Ty, NestedParallelism ? 1 : 0),
+ Kernel->getName() + "_nested_parallelism");
// If we can we change the execution mode to SPMD-mode otherwise we build a
// custom state machine.
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
if (!changeToSPMDMode(A, Changed)) {
if (!KernelInitCB->getCalledFunction()->isDeclaration())
- Changed |= buildCustomStateMachine(A);
- }
-
- // At last, update the KernelEnvc
- GlobalVariable *KernelEnvGV =
- KernelInfo::getKernelEnvironementGVFromKernelInitCB(KernelInitCB);
- if (KernelEnvGV->getInitializer() != KernelEnvC) {
- KernelEnvGV->setInitializer(KernelEnvC);
- Changed = ChangeStatus::CHANGED;
+ return buildCustomStateMachine(A);
}
return Changed;
@@ -3888,14 +3787,14 @@ struct AAKernelInfoFunction : AAKernelInfo {
// Find escaping outputs from the guarded region to outside users and
// broadcast their values to them.
for (Instruction &I : *RegionStartBB) {
- SmallVector<Use *, 4> OutsideUses;
- for (Use &U : I.uses()) {
- Instruction &UsrI = *cast<Instruction>(U.getUser());
+ SmallPtrSet<Instruction *, 4> OutsideUsers;
+ for (User *Usr : I.users()) {
+ Instruction &UsrI = *cast<Instruction>(Usr);
if (UsrI.getParent() != RegionStartBB)
- OutsideUses.push_back(&U);
+ OutsideUsers.insert(&UsrI);
}
- if (OutsideUses.empty())
+ if (OutsideUsers.empty())
continue;
HasBroadcastValues = true;
@@ -3918,8 +3817,8 @@ struct AAKernelInfoFunction : AAKernelInfo {
RegionBarrierBB->getTerminator());
// Emit a load instruction and replace uses of the output value.
- for (Use *U : OutsideUses)
- A.changeUseAfterManifest(*U, *LoadI);
+ for (Instruction *UsrI : OutsideUsers)
+ UsrI->replaceUsesOfWith(&I, LoadI);
}
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
@@ -4146,11 +4045,16 @@ struct AAKernelInfoFunction : AAKernelInfo {
assert(omp::isKernel(*Kernel) && "Expected kernel function!");
// Check if the kernel is already in SPMD mode, if so, return success.
- ConstantStruct *ExistingKernelEnvC =
- KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB);
- auto *ExecModeC =
- KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC);
- const int8_t ExecModeVal = ExecModeC->getSExtValue();
+ GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable(
+ (Kernel->getName() + "_exec_mode").str());
+ assert(ExecMode && "Kernel without exec mode?");
+ assert(ExecMode->getInitializer() && "ExecMode doesn't have initializer!");
+
+ // Set the global exec mode flag to indicate SPMD-Generic mode.
+ assert(isa<ConstantInt>(ExecMode->getInitializer()) &&
+ "ExecMode is not an integer!");
+ const int8_t ExecModeVal =
+ cast<ConstantInt>(ExecMode->getInitializer())->getSExtValue();
if (ExecModeVal != OMP_TGT_EXEC_MODE_GENERIC)
return true;
@@ -4168,8 +4072,27 @@ struct AAKernelInfoFunction : AAKernelInfo {
// kernel is executed in.
assert(ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC &&
"Initially non-SPMD kernel has SPMD exec mode!");
- setExecModeOfKernelEnvironment(ConstantInt::get(
- ExecModeC->getType(), ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD));
+ ExecMode->setInitializer(
+ ConstantInt::get(ExecMode->getInitializer()->getType(),
+ ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD));
+
+ // Next rewrite the init and deinit calls to indicate we use SPMD-mode now.
+ const int InitModeArgNo = 1;
+ const int DeinitModeArgNo = 1;
+ const int InitUseStateMachineArgNo = 2;
+
+ auto &Ctx = getAnchorValue().getContext();
+ A.changeUseAfterManifest(
+ KernelInitCB->getArgOperandUse(InitModeArgNo),
+ *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx),
+ OMP_TGT_EXEC_MODE_SPMD));
+ A.changeUseAfterManifest(
+ KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo),
+ *ConstantInt::getBool(Ctx, false));
+ A.changeUseAfterManifest(
+ KernelDeinitCB->getArgOperandUse(DeinitModeArgNo),
+ *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx),
+ OMP_TGT_EXEC_MODE_SPMD));
++NumOpenMPTargetRegionKernelsSPMD;
@@ -4196,29 +4119,30 @@ struct AAKernelInfoFunction : AAKernelInfo {
OMPRTL___kmpc_kernel_parallel, OMPRTL___kmpc_kernel_end_parallel}))
return ChangeStatus::UNCHANGED;
- ConstantStruct *ExistingKernelEnvC =
- KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB);
+ const int InitModeArgNo = 1;
+ const int InitUseStateMachineArgNo = 2;
// Check if the current configuration is non-SPMD and generic state machine.
// If we already have SPMD mode or a custom state machine we do not need to
// go any further. If it is anything but a constant something is weird and
// we give up.
- ConstantInt *UseStateMachineC =
- KernelInfo::getUseGenericStateMachineFromKernelEnvironment(
- ExistingKernelEnvC);
- ConstantInt *ModeC =
- KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC);
+ ConstantInt *UseStateMachine = dyn_cast<ConstantInt>(
+ KernelInitCB->getArgOperand(InitUseStateMachineArgNo));
+ ConstantInt *Mode =
+ dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo));
// If we are stuck with generic mode, try to create a custom device (=GPU)
// state machine which is specialized for the parallel regions that are
// reachable by the kernel.
- if (UseStateMachineC->isZero() ||
- (ModeC->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
+ if (!UseStateMachine || UseStateMachine->isZero() || !Mode ||
+ (Mode->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
return ChangeStatus::UNCHANGED;
// If not SPMD mode, indicate we use a custom state machine now.
- setUseGenericStateMachineOfKernelEnvironment(
- ConstantInt::get(UseStateMachineC->getType(), false));
+ auto &Ctx = getAnchorValue().getContext();
+ auto *FalseVal = ConstantInt::getBool(Ctx, false);
+ A.changeUseAfterManifest(
+ KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), *FalseVal);
// If we don't actually need a state machine we are done here. This can
// happen if there simply are no parallel regions. In the resulting kernel
@@ -4297,7 +4221,6 @@ struct AAKernelInfoFunction : AAKernelInfo {
// UserCodeEntryBB: // user code
// __kmpc_target_deinit(...)
//
- auto &Ctx = getAnchorValue().getContext();
Function *Kernel = getAssociatedFunction();
assert(Kernel && "Expected an associated function!");
@@ -4380,7 +4303,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
StateMachineBeginBB->end()),
DLoc));
- Value *Ident = KernelInfo::getIdentFromKernelEnvironment(KernelEnvC);
+ Value *Ident = KernelInitCB->getArgOperand(0);
Value *GTid = KernelInitCB;
FunctionCallee BarrierFn =
@@ -4507,46 +4430,6 @@ struct AAKernelInfoFunction : AAKernelInfo {
ChangeStatus updateImpl(Attributor &A) override {
KernelInfoState StateBefore = getState();
- // When we leave this function this RAII will make sure the member
- // KernelEnvC is updated properly depending on the state. That member is
- // used for simplification of values and needs to be up to date at all
- // times.
- struct UpdateKernelEnvCRAII {
- AAKernelInfoFunction &AA;
-
- UpdateKernelEnvCRAII(AAKernelInfoFunction &AA) : AA(AA) {}
-
- ~UpdateKernelEnvCRAII() {
- if (!AA.KernelEnvC)
- return;
-
- ConstantStruct *ExistingKernelEnvC =
- KernelInfo::getKernelEnvironementFromKernelInitCB(AA.KernelInitCB);
-
- if (!AA.isValidState()) {
- AA.KernelEnvC = ExistingKernelEnvC;
- return;
- }
-
- if (!AA.ReachedKnownParallelRegions.isValidState())
- AA.setUseGenericStateMachineOfKernelEnvironment(
- KernelInfo::getUseGenericStateMachineFromKernelEnvironment(
- ExistingKernelEnvC));
-
- if (!AA.SPMDCompatibilityTracker.isValidState())
- AA.setExecModeOfKernelEnvironment(
- KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC));
-
- ConstantInt *MayUseNestedParallelismC =
- KernelInfo::getMayUseNestedParallelismFromKernelEnvironment(
- AA.KernelEnvC);
- ConstantInt *NewMayUseNestedParallelismC = ConstantInt::get(
- MayUseNestedParallelismC->getType(), AA.NestedParallelism);
- AA.setMayUseNestedParallelismOfKernelEnvironment(
- NewMayUseNestedParallelismC);
- }
- } RAII(*this);
-
// Callback to check a read/write instruction.
auto CheckRWInst = [&](Instruction &I) {
// We handle calls later.
diff --git a/llvm/test/Transforms/Attributor/reduced/aa_execution_domain_wrong_fn.ll b/llvm/test/Transforms/Attributor/reduced/aa_execution_domain_wrong_fn.ll
index 8baee3f74d30f3..33ddafb651962e 100644
--- a/llvm/test/Transforms/Attributor/reduced/aa_execution_domain_wrong_fn.ll
+++ b/llvm/test/Transforms/Attributor/reduced/aa_execution_domain_wrong_fn.ll
@@ -7,13 +7,13 @@
@_ZN4ompx5state9TeamStateE = internal addrspace(3) global %"struct.ompx::state::TeamStateTy" undef
define weak_odr amdgpu_kernel void @__omp_offloading_16_1d1156__Z38test_target_teams_distribute__parallelv_l16() {
- %1 = tail call i32 @__kmpc_target_init(ptr null)
+ %1 = tail call i32 @__kmpc_target_init(ptr null, i8 0, i1 false)
ret void
}
-define internal i32 @__kmpc_target_init(ptr %0) {
+define internal i32 @__kmpc_target_init(ptr %0, i8 %1, i1 %2) {
store <2 x i32> zeroinitializer, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 16
- %2 = call i1 @__kmpc_kernel_parallel()
+ %4 = call i1 @__kmpc_kernel_parallel()
ret i32 0
}
@@ -29,14 +29,14 @@ define internal i1 @__kmpc_kernel_parallel() {
; CHECK: @[[_ZN4OMPX5STATE9TEAMSTATEE:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global %"struct.ompx::state::TeamStateTy" undef
;.
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_16_1d1156__Z38test_target_teams_distribute__parallelv_l16() {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_target_init(ptr null)
+; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_target_init(ptr null, i8 0, i1 false)
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
-; CHECK-SAME: (ptr [[TMP0:%.*]]) {
+; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 16
-; CHECK-NEXT: [[TMP2:%.*]] = call i1 @__kmpc_kernel_parallel()
+; CHECK-NEXT: [[TMP4:%.*]] = call i1 @__kmpc_kernel_parallel()
; CHECK-NEXT: ret i32 0
;
;
diff --git a/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll b/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll
index 2c1fdee58f287a..d7f8d23ac8689a 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll
@@ -310,7 +310,7 @@ entry:
define weak_odr void @t3() {
; CHECK-LABEL: define {{[^@]+}}@t3() {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr noundef align 4294967296 null)
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr noundef align 4294967296 null, i8 noundef 0, i1 noundef false, i1 noundef false)
; CHECK-NEXT: br label [[USER_CODE_ENTRY:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: br label [[FOR_COND:%.*]]
@@ -321,7 +321,7 @@ define weak_odr void @t3() {
; CHECK-NEXT: ret void
;
entry:
- %0 = call i32 @__kmpc_target_init(ptr null)
+ %0 = call i32 @__kmpc_target_init(ptr null, i8 0, i1 false, i1 false)
br label %user_code.entry
user_code.entry: ; preds = %entry
@@ -335,7 +335,7 @@ for.body: ; preds = %for.cond
ret void
}
-declare i32 @__kmpc_target_init(ptr)
+declare i32 @__kmpc_target_init(ptr, i8, i1, i1)
define %S.2 @t3.helper() {
; CHECK-LABEL: define {{[^@]+}}@t3.helper() {
@@ -380,22 +380,22 @@ define dso_local void @spam() {
; TUNIT-NEXT: store i32 [[X]], ptr [[TMP]], align 4
; TUNIT-NEXT: br label [[BB16:%.*]]
; TUNIT: bb16:
-; TUNIT-NEXT: [[TMP18:%.*]] = icmp eq i32 [[X]], 0
-; TUNIT-NEXT: br i1 [[TMP18]], label [[BB35:%.*]], label [[BB19:%.*]]
+; TUNIT-NEXT: [[TRUETMP18:%.*]] = icmp eq i32 [[X]], 0
+; TUNIT-NEXT: br i1 [[TRUETMP18]], label [[BB35:%.*]], label [[BB19:%.*]]
; TUNIT: bb19:
-; TUNIT-NEXT: [[TMP21:%.*]] = load float, ptr [[TMP]], align 4
-; TUNIT-NEXT: [[TMP22:%.*]] = fadd fast float [[TMP21]], 0.000000e+00
+; TUNIT-NEXT: [[TRUETMP21:%.*]] = load float, ptr [[TMP]], align 4
+; TUNIT-NEXT: [[TRUETMP22:%.*]] = fadd fast float [[TRUETMP21]], 0.000000e+00
; TUNIT-NEXT: br label [[BB23:%.*]]
; TUNIT: bb23:
-; TUNIT-NEXT: [[TMP24:%.*]] = phi <2 x float> [ undef, [[BB19]] ], [ [[TMP26:%.*]], [[BB34:%.*]] ]
+; TUNIT-NEXT: [[TRUETMP24:%.*]] = phi <2 x float> [ undef, [[BB19]] ], [ [[TRUETMP26:%.*]], [[BB34:%.*]] ]
; TUNIT-NEXT: br label [[BB25:%.*]]
; TUNIT: bb25:
-; TUNIT-NEXT: [[TMP26]] = phi <2 x float> [ [[TMP30:%.*]], [[BB28:%.*]] ], [ [[TMP24]], [[BB23]] ]
-; TUNIT-NEXT: [[TMP27:%.*]] = icmp ult i32 undef, 8
-; TUNIT-NEXT: br i1 [[TMP27]], label [[BB28]], label [[BB34]]
+; TUNIT-NEXT: [[TRUETMP26]] = phi <2 x float> [ [[TRUETMP30:%.*]], [[BB28:%.*]] ], [ [[TRUETMP24]], [[BB23]] ]
+; TUNIT-NEXT: [[TRUETMP27:%.*]] = icmp ult i32 undef, 8
+; TUNIT-NEXT: br i1 [[TRUETMP27]], label [[BB28]], label [[BB34]]
; TUNIT: bb28:
-; TUNIT-NEXT: [[TMP29:%.*]] = insertelement <2 x float> [[TMP26]], float undef, i32 0
-; TUNIT-NEXT: [[TMP30]] = insertelement <2 x float> [[TMP29]], float [[TMP22]], i32 1
+; TUNIT-NEXT: [[TRUETMP29:%.*]] = insertelement <2 x float> [[TRUETMP26]], float undef, i32 0
+; TUNIT-NEXT: [[TRUETMP30]] = insertelement <2 x float> [[TRUETMP29]], float [[TRUETMP22]], i32 1
; TUNIT-NEXT: br label [[BB25]]
; TUNIT: bb34:
; TUNIT-NEXT: br label [[BB23]]
@@ -411,22 +411,22 @@ define dso_local void @spam() {
; CGSCC-NEXT: store i32 [[X]], ptr [[TMP]], align 4
; CGSCC-NEXT: br label [[BB16:%.*]]
; CGSCC: bb16:
-; CGSCC-NEXT: [[TMP18:%.*]] = icmp eq i32 [[X]], 0
-; CGSCC-NEXT: br i1 [[TMP18]], label [[BB35:%.*]], label [[BB19:%.*]]
+; CGSCC-NEXT: [[TRUETMP18:%.*]] = icmp eq i32 [[X]], 0
+; CGSCC-NEXT: br i1 [[TRUETMP18]], label [[BB35:%.*]], label [[BB19:%.*]]
; CGSCC: bb19:
-; CGSCC-NEXT: [[TMP21:%.*]] = load float, ptr [[TMP]], align 4
-; CGSCC-NEXT: [[TMP22:%.*]] = fadd fast float [[TMP21]], 0.000000e+00
+; CGSCC-NEXT: [[TRUETMP21:%.*]] = load float, ptr [[TMP]], align 4
+; CGSCC-NEXT: [[TRUETMP22:%.*]] = fadd fast float [[TRUETMP21]], 0.000000e+00
; CGSCC-NEXT: br label [[BB23:%.*]]
; CGSCC: bb23:
-; CGSCC-NEXT: [[TMP24:%.*]] = phi <2 x float> [ undef, [[BB19]] ], [ [[TMP26:%.*]], [[BB34:%.*]] ]
+; CGSCC-NEXT: [[TRUETMP24:%.*]] = phi <2 x float> [ undef, [[BB19]] ], [ [[TRUETMP26:%.*]], [[BB34:%.*]] ]
; CGSCC-NEXT: br label [[BB25:%.*]]
; CGSCC: bb25:
-; CGSCC-NEXT: [[TMP26]] = phi <2 x float> [ [[TMP30:%.*]], [[BB28:%.*]] ], [ [[TMP24]], [[BB23]] ]
-; CGSCC-NEXT: [[TMP27:%.*]] = icmp ult i32 undef, 8
-; CGSCC-NEXT: br i1 [[TMP27]], label [[BB28]], label [[BB34]]
+; CGSCC-NEXT: [[TRUETMP26]] = phi <2 x float> [ [[TRUETMP30:%.*]], [[BB28:%.*]] ], [ [[TRUETMP24]], [[BB23]] ]
+; CGSCC-NEXT: [[TRUETMP27:%.*]] = icmp ult i32 undef, 8
+; CGSCC-NEXT: br i1 [[TRUETMP27]], label [[BB28]], label [[BB34]]
; CGSCC: bb28:
-; CGSCC-NEXT: [[TMP29:%.*]] = insertelement <2 x float> [[TMP26]], float undef, i32 0
-; CGSCC-NEXT: [[TMP30]] = insertelement <2 x float> [[TMP29]], float [[TMP22]], i32 1
+; CGSCC-NEXT: [[TRUETMP29:%.*]] = insertelement <2 x float> [[TRUETMP26]], float undef, i32 0
+; CGSCC-NEXT: [[TRUETMP30]] = insertelement <2 x float> [[TRUETMP29]], float [[TRUETMP22]], i32 1
; CGSCC-NEXT: br label [[BB25]]
; CGSCC: bb34:
; CGSCC-NEXT: br label [[BB23]]
diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll
index 3573355169f284..4bebf55b1d0de3 100644
--- a/llvm/test/Transforms/OpenMP/add_attributes.ll
+++ b/llvm/test/Transforms/OpenMP/add_attributes.ll
@@ -736,9 +736,9 @@ declare i32 @__kmpc_shuffle_int32(i32, i16, i16);
declare i64 @__kmpc_shuffle_int64(i64, i16, i16);
-declare void @__kmpc_target_deinit();
+declare void @__kmpc_target_deinit(ptr, i8);
-declare i32 @__kmpc_target_init(ptr);
+declare i32 @__kmpc_target_init(ptr, i8, i1);
declare void @__tgt_interop_destroy(ptr, i32, ptr, i32, i32, ptr, i32);
@@ -1389,10 +1389,10 @@ declare i32 @__tgt_target_kernel_nowait(ptr, i64, i32, i32, ptr, ptr, i32, ptr,
; CHECK: declare i64 @__kmpc_shuffle_int64(i64, i16, i16)
; CHECK-NOT: Function Attrs
-; CHECK: declare void @__kmpc_target_deinit()
+; CHECK: declare void @__kmpc_target_deinit(ptr, i8)
; CHECK-NOT: Function Attrs
-; CHECK: declare i32 @__kmpc_target_init(ptr)
+; CHECK: declare i32 @__kmpc_target_init(ptr, i8, i1)
; CHECK-NOT: Function Attrs
; CHECK: declare void @__tgt_interop_destroy(ptr, i32, ptr, i32, i32, ptr, i32)
@@ -2037,10 +2037,10 @@ declare i32 @__tgt_target_kernel_nowait(ptr, i64, i32, i32, ptr, ptr, i32, ptr,
; OPTIMISTIC: declare i64 @__kmpc_shuffle_int64(i64, i16, i16)
; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare void @__kmpc_target_deinit()
+; OPTIMISTIC: declare void @__kmpc_target_deinit(ptr, i8)
; OPTIMISTIC-NOT: Function Attrs
-; OPTIMISTIC: declare i32 @__kmpc_target_init(ptr)
+; OPTIMISTIC: declare i32 @__kmpc_target_init(ptr, i8, i1)
; OPTIMISTIC-NOT: Function Attrs
; OPTIMISTIC: declare void @__tgt_interop_destroy(ptr, i32, ptr, i32, i32, ptr, i32)
@@ -2698,10 +2698,10 @@ declare i32 @__tgt_target_kernel_nowait(ptr, i64, i32, i32, ptr, ptr, i32, ptr,
; EXT: declare i64 @__kmpc_shuffle_int64(i64, i16 signext, i16 signext)
; EXT-NOT: Function Attrs
-; EXT: declare void @__kmpc_target_deinit()
+; EXT: declare void @__kmpc_target_deinit(ptr, i8 signext)
; EXT-NOT: Function Attrs
-; EXT: declare signext i32 @__kmpc_target_init(ptr)
+; EXT: declare signext i32 @__kmpc_target_init(ptr, i8 signext, i1 signext)
; EXT-NOT: Function Attrs
; EXT: declare void @__tgt_interop_destroy(ptr, i32 signext, ptr, i32 signext, i32 signext, ptr, i32 signext)
diff --git a/llvm/test/Transforms/OpenMP/always_inline_device.ll b/llvm/test/Transforms/OpenMP/always_inline_device.ll
index 5478924cf828b6..3aa1101a08b2c2 100644
--- a/llvm/test/Transforms/OpenMP/always_inline_device.ll
+++ b/llvm/test/Transforms/OpenMP/always_inline_device.ll
@@ -1,27 +1,19 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes
; RUN: opt < %s -S -passes=openmp-opt -openmp-opt-inline-device | FileCheck %s
%struct.ident_t = type { i32, i32, i32, i32, ptr }
-%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
+ at __omp_offloading_fd02_c0934fc2_foo_l4_exec_mode = weak constant i8 1
+ at llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_fd02_c0934fc2_foo_l4_exec_mode], section "llvm.metadata"
@G = external global i8
- at kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
-
-; Function Attrs: convergent norecurse nounwind
-;.
-; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
-; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i8
-; CHECK: @[[KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr @[[GLOB1]], ptr null }
-;.
+; Function Attrs: norecurse nounwind
define weak void @__omp_offloading_fd02_c0934fc2_foo_l4() #0 {
; CHECK: Function Attrs: norecurse nounwind
; CHECK-LABEL: @__omp_offloading_fd02_c0934fc2_foo_l4(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @kernel_environment)
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0
; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]]
@@ -32,13 +24,13 @@ define weak void @__omp_offloading_fd02_c0934fc2_foo_l4() #0 {
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: store i8 1, ptr @G, align 1
-; CHECK-NEXT: call void @__kmpc_target_deinit()
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
;
entry:
- %0 = call i32 @__kmpc_target_init(ptr @kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
@@ -50,7 +42,7 @@ user_code.entry: ; preds = %entry
%isSPMD = call i8 @__kmpc_is_spmd_exec_mode()
store i8 %isSPMD, ptr @G
call void @bar() #2
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
worker.exit: ; preds = %entry
@@ -59,9 +51,9 @@ worker.exit: ; preds = %entry
declare i8 @__kmpc_is_spmd_exec_mode()
-declare i32 @__kmpc_target_init(ptr)
+declare i32 @__kmpc_target_init(ptr, i8, i1)
-declare void @__kmpc_target_deinit()
+declare void @__kmpc_target_deinit(ptr, i8)
; Function Attrs: convergent nounwind
define hidden void @bar() #1 {
@@ -91,17 +83,3 @@ attributes #2 = { convergent }
!5 = !{i32 7, !"PIC Level", i32 2}
!6 = !{i32 7, !"frame-pointer", i32 2}
!7 = !{!"clang version 14.0.0"}
-;.
-; CHECK: attributes #[[ATTR0:[0-9]+]] = { norecurse nounwind "frame-pointer"="all" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx32,+sm_70" }
-; CHECK: attributes #[[ATTR1:[0-9]+]] = { alwaysinline convergent nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_70" "target-features"="+ptx32,+sm_70" }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind }
-;.
-; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 -1064087614, !"foo", i32 4, i32 0}
-; CHECK: [[META1:![0-9]+]] = !{ptr @__omp_offloading_fd02_c0934fc2_foo_l4, !"kernel", i32 1}
-; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; CHECK: [[META7:![0-9]+]] = !{!"clang version 14.0.0"}
-;.
diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll
index 248fcff9022807..30f4b45ac42575 100644
--- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll
+++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --include-generated-funcs
; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU
; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=NVPTX
; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU-DISABLED
@@ -120,30 +120,28 @@
;; }
%struct.ident_t = type { i32, i32, i32, i32, ptr }
-%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
+ at __omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode = weak constant i8 1
+ at __omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode = weak constant i8 1
+ at __omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode = weak constant i8 1
+ at __omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_exec_mode = weak constant i8 1
+ at __omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_exec_mode = weak constant i8 1
+ at __omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode = weak constant i8 1
+ at __omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode = weak constant i8 1
+ at __omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode = weak constant i8 1
@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8
@G = external global i32, align 4
@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @0 }, align 8
-
- at __omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
- at __omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
- at __omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
- at __omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
- at __omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
- at __omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
- at __omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
- at __omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
+ at llvm.compiler.used = appending global [8 x ptr] [ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode, ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode], section "llvm.metadata"
define weak void @__omp_offloading_14_a36502b_no_state_machine_needed_l14() #0 {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, ptr %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
@@ -151,7 +149,7 @@ user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
store i32 %1, ptr %.threadid_temp., align 4
call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) #3
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
worker.exit: ; preds = %entry
@@ -159,7 +157,7 @@ worker.exit: ; preds = %entry
}
; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
-define weak i32 @__kmpc_target_init(ptr) {
+define weak i32 @__kmpc_target_init(ptr, i8, i1) {
ret i32 0
}
@@ -195,14 +193,14 @@ declare void @unknown_no_openmp() #2
declare i32 @__kmpc_global_thread_num(ptr) #3
-declare void @__kmpc_target_deinit()
+declare void @__kmpc_target_deinit(ptr, i8)
define weak void @__omp_offloading_14_a36502b_simple_state_machine_l22() #0 {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, ptr %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
@@ -210,7 +208,7 @@ user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
store i32 %1, ptr %.threadid_temp., align 4
call void @__omp_outlined__1(ptr %.threadid_temp., ptr %.zero.addr) #3
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
worker.exit: ; preds = %entry
@@ -295,7 +293,7 @@ entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, ptr %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
@@ -303,7 +301,7 @@ user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
store i32 %1, ptr %.threadid_temp., align 4
call void @__omp_outlined__4(ptr %.threadid_temp., ptr %.zero.addr) #3
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
worker.exit: ; preds = %entry
@@ -372,7 +370,7 @@ entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, ptr %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
@@ -380,7 +378,7 @@ user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
store i32 %1, ptr %.threadid_temp., align 4
call void @__omp_outlined__6(ptr %.threadid_temp., ptr %.zero.addr) #3
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
worker.exit: ; preds = %entry
@@ -458,7 +456,7 @@ entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, ptr %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
@@ -466,7 +464,7 @@ user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
store i32 %1, ptr %.threadid_temp., align 4
call void @__omp_outlined__9(ptr %.threadid_temp., ptr %.zero.addr) #3
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
worker.exit: ; preds = %entry
@@ -542,7 +540,7 @@ entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, ptr %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
@@ -550,7 +548,7 @@ user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
store i32 %1, ptr %.threadid_temp., align 4
call void @__omp_outlined__12(ptr %.threadid_temp., ptr %.zero.addr) #3
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
worker.exit: ; preds = %entry
@@ -629,7 +627,7 @@ entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, ptr %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
@@ -637,7 +635,7 @@ user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
store i32 %1, ptr %.threadid_temp., align 4
call void @__omp_outlined__15(ptr %.threadid_temp., ptr %.zero.addr) #3
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
worker.exit: ; preds = %entry
@@ -684,7 +682,7 @@ entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, ptr %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
@@ -692,7 +690,7 @@ user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
store i32 %1, ptr %.threadid_temp., align 4
call void @__omp_outlined__16(ptr %.threadid_temp., ptr %.zero.addr) #3
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
worker.exit: ; preds = %entry
@@ -834,101 +832,26 @@ attributes #9 = { convergent nounwind readonly willreturn }
!16 = !{i32 1, !"wchar_size", i32 4}
!17 = !{i32 7, !"openmp", i32 50}
!18 = !{i32 7, !"openmp-device", i32 50}
-;.
-; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
-; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
-; AMDGPU: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4
-; AMDGPU: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
-; AMDGPU: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU: @[[__OMP_OUTLINED__2_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; AMDGPU: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; AMDGPU: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; AMDGPU: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; AMDGPU: @[[__OMP_OUTLINED__8_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; AMDGPU: @[[__OMP_OUTLINED__10_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; AMDGPU: @[[__OMP_OUTLINED__11_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; AMDGPU: @[[__OMP_OUTLINED__13_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; AMDGPU: @[[__OMP_OUTLINED__14_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-;.
-; NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
-; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
-; NVPTX: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4
-; NVPTX: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
-; NVPTX: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX: @[[__OMP_OUTLINED__2_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; NVPTX: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; NVPTX: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; NVPTX: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; NVPTX: @[[__OMP_OUTLINED__8_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; NVPTX: @[[__OMP_OUTLINED__10_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; NVPTX: @[[__OMP_OUTLINED__11_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; NVPTX: @[[__OMP_OUTLINED__13_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; NVPTX: @[[__OMP_OUTLINED__14_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-;.
-; AMDGPU-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; AMDGPU-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
-; AMDGPU-DISABLED: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
-; AMDGPU-DISABLED: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4
-; AMDGPU-DISABLED: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
-; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-;.
-; NVPTX-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; NVPTX-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
-; NVPTX-DISABLED: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
-; NVPTX-DISABLED: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4
-; NVPTX-DISABLED: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
-; NVPTX-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-;.
; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
; AMDGPU-SAME: () #[[ATTR0:[0-9]+]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 false)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
; AMDGPU-NEXT: ret void
;
;
; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init
-; AMDGPU-SAME: (ptr [[TMP0:%.*]]) {
+; AMDGPU-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; AMDGPU-NEXT: ret i32 0
;
;
@@ -947,7 +870,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]]
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
; AMDGPU-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; AMDGPU-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
@@ -956,7 +879,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
; AMDGPU-NEXT: br label [[OMP_IF_END]]
; AMDGPU: omp_if.end:
-; AMDGPU-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]]
; AMDGPU-NEXT: ret void
;
;
@@ -984,7 +907,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
@@ -1030,7 +953,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
; AMDGPU-NEXT: ret void
@@ -1103,7 +1026,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
@@ -1155,7 +1078,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
; AMDGPU-NEXT: ret void
@@ -1245,7 +1168,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
@@ -1293,7 +1216,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
; AMDGPU-NEXT: ret void
@@ -1365,7 +1288,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
@@ -1411,7 +1334,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
; AMDGPU-NEXT: ret void
@@ -1483,7 +1406,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
@@ -1529,7 +1452,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
; AMDGPU-NEXT: ret void
@@ -1601,7 +1524,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
@@ -1641,7 +1564,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
; AMDGPU-NEXT: ret void
@@ -1707,7 +1630,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
@@ -1743,7 +1666,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
; AMDGPU-NEXT: ret void
@@ -1861,20 +1784,20 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 false)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
; NVPTX-NEXT: ret void
;
;
; NVPTX-LABEL: define {{[^@]+}}@__kmpc_target_init
-; NVPTX-SAME: (ptr [[TMP0:%.*]]) {
+; NVPTX-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; NVPTX-NEXT: ret i32 0
;
;
@@ -1893,7 +1816,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
; NVPTX-SAME: () #[[ATTR1:[0-9]+]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]]
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
; NVPTX-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; NVPTX-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
@@ -1902,7 +1825,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
; NVPTX-NEXT: br label [[OMP_IF_END]]
; NVPTX: omp_if.end:
-; NVPTX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]]
; NVPTX-NEXT: ret void
;
;
@@ -1930,7 +1853,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
@@ -1975,7 +1898,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
; NVPTX-NEXT: ret void
@@ -2048,7 +1971,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
@@ -2099,7 +2022,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
; NVPTX-NEXT: ret void
@@ -2189,7 +2112,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
@@ -2236,7 +2159,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
; NVPTX-NEXT: ret void
@@ -2308,7 +2231,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
@@ -2353,7 +2276,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
; NVPTX-NEXT: ret void
@@ -2425,7 +2348,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
@@ -2470,7 +2393,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
; NVPTX-NEXT: ret void
@@ -2542,7 +2465,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
@@ -2581,7 +2504,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
; NVPTX-NEXT: ret void
@@ -2647,7 +2570,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
@@ -2682,7 +2605,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
; NVPTX-NEXT: ret void
@@ -2800,20 +2723,20 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
; AMDGPU-DISABLED-NEXT: ret void
;
;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
-; AMDGPU-DISABLED-SAME: (ptr [[TMP0:%.*]]) {
+; AMDGPU-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; AMDGPU-DISABLED-NEXT: ret i32 0
;
;
@@ -2832,7 +2755,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
; AMDGPU-DISABLED-SAME: () #[[ATTR1:[0-9]+]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; AMDGPU-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
@@ -2841,7 +2764,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: br label [[OMP_IF_END]]
; AMDGPU-DISABLED: omp_if.end:
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -2868,13 +2791,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
; AMDGPU-DISABLED-NEXT: ret void
@@ -2946,13 +2869,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
; AMDGPU-DISABLED-NEXT: ret void
@@ -3041,13 +2964,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
; AMDGPU-DISABLED-NEXT: ret void
@@ -3118,13 +3041,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
; AMDGPU-DISABLED-NEXT: ret void
@@ -3195,13 +3118,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
; AMDGPU-DISABLED-NEXT: ret void
@@ -3272,13 +3195,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
; AMDGPU-DISABLED-NEXT: ret void
@@ -3343,13 +3266,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
; AMDGPU-DISABLED-NEXT: ret void
@@ -3467,20 +3390,20 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
; NVPTX-DISABLED-NEXT: ret void
;
;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
-; NVPTX-DISABLED-SAME: (ptr [[TMP0:%.*]]) {
+; NVPTX-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; NVPTX-DISABLED-NEXT: ret i32 0
;
;
@@ -3499,7 +3422,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
; NVPTX-DISABLED-SAME: () #[[ATTR1:[0-9]+]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
; NVPTX-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
@@ -3508,7 +3431,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: br label [[OMP_IF_END]]
; NVPTX-DISABLED: omp_if.end:
-; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3535,13 +3458,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
; NVPTX-DISABLED-NEXT: ret void
@@ -3613,13 +3536,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
; NVPTX-DISABLED-NEXT: ret void
@@ -3708,13 +3631,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
; NVPTX-DISABLED-NEXT: ret void
@@ -3785,13 +3708,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
; NVPTX-DISABLED-NEXT: ret void
@@ -3862,13 +3785,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
; NVPTX-DISABLED-NEXT: ret void
@@ -3939,13 +3862,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
; NVPTX-DISABLED-NEXT: ret void
@@ -4010,13 +3933,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
; NVPTX-DISABLED-NEXT: ret void
@@ -4127,136 +4050,3 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
;
-;.
-; AMDGPU: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU: attributes #[[ATTR3]] = { nounwind }
-; AMDGPU: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
-; AMDGPU: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU: attributes #[[ATTR9]] = { convergent nounwind }
-; AMDGPU: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
-; AMDGPU: attributes #[[ATTR11]] = { convergent }
-;.
-; NVPTX: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX: attributes #[[ATTR3]] = { nounwind }
-; NVPTX: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
-; NVPTX: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX: attributes #[[ATTR9]] = { convergent nounwind }
-; NVPTX: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
-; NVPTX: attributes #[[ATTR11]] = { convergent }
-;.
-; AMDGPU-DISABLED: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU-DISABLED: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU-DISABLED: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU-DISABLED: attributes #[[ATTR3]] = { nounwind }
-; AMDGPU-DISABLED: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU-DISABLED: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
-; AMDGPU-DISABLED: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU-DISABLED: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU-DISABLED: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU-DISABLED: attributes #[[ATTR9]] = { convergent nounwind }
-; AMDGPU-DISABLED: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
-; AMDGPU-DISABLED: attributes #[[ATTR11]] = { convergent }
-;.
-; NVPTX-DISABLED: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX-DISABLED: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX-DISABLED: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX-DISABLED: attributes #[[ATTR3]] = { nounwind }
-; NVPTX-DISABLED: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX-DISABLED: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
-; NVPTX-DISABLED: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX-DISABLED: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX-DISABLED: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX-DISABLED: attributes #[[ATTR9]] = { convergent nounwind }
-; NVPTX-DISABLED: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
-; NVPTX-DISABLED: attributes #[[ATTR11]] = { convergent }
-;.
-; AMDGPU: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
-; AMDGPU: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
-; AMDGPU: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0}
-; AMDGPU: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3}
-; AMDGPU: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5}
-; AMDGPU: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
-; AMDGPU: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
-; AMDGPU: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; AMDGPU: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; AMDGPU: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; AMDGPU: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; AMDGPU: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; AMDGPU: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; AMDGPU: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; AMDGPU: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; AMDGPU: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; AMDGPU: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-;.
-; NVPTX: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
-; NVPTX: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
-; NVPTX: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0}
-; NVPTX: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3}
-; NVPTX: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5}
-; NVPTX: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
-; NVPTX: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
-; NVPTX: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; NVPTX: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; NVPTX: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; NVPTX: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; NVPTX: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; NVPTX: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; NVPTX: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; NVPTX: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; NVPTX: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; NVPTX: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-;.
-; AMDGPU-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
-; AMDGPU-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
-; AMDGPU-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0}
-; AMDGPU-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3}
-; AMDGPU-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5}
-; AMDGPU-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
-; AMDGPU-DISABLED: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
-; AMDGPU-DISABLED: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; AMDGPU-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU-DISABLED: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU-DISABLED: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-;.
-; NVPTX-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
-; NVPTX-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
-; NVPTX-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0}
-; NVPTX-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3}
-; NVPTX-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5}
-; NVPTX-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
-; NVPTX-DISABLED: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
-; NVPTX-DISABLED: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; NVPTX-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX-DISABLED: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX-DISABLED: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-;.
diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll b/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll
index dffb2de31cc5e7..002ae7b078b2b4 100644
--- a/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll
+++ b/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll
@@ -1,10 +1,10 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs
-; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=AMDGPU1
-; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=NVPTX1
-; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=AMDGPU2
-; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=AMDGPU3
-; RUN: opt --mtriple=nvptx64-- -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=NVPTX2
-; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=NVPTX3
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --include-generated-funcs
+; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=AMDGPU
+; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=NVPTX
+; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=AMDGPU
+; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=AMDGPU
+; RUN: opt --mtriple=nvptx64-- -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=NVPTX
+; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=NVPTX
;; void p0(void);
;; void p1(void);
@@ -122,29 +122,28 @@
;; }
%struct.ident_t = type { i32, i32, i32, i32, ptr }
-%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
+ at __omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode = weak constant i8 1
+ at __omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode = weak constant i8 1
+ at __omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode = weak constant i8 1
+ at __omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_exec_mode = weak constant i8 1
+ at __omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_exec_mode = weak constant i8 1
+ at __omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode = weak constant i8 1
+ at __omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode = weak constant i8 1
+ at __omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode = weak constant i8 1
@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8
@G = external global i32, align 4
@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @0 }, align 8
- at __omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
- at __omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
- at __omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
- at __omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
- at __omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
- at __omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
- at __omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
- at __omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
+ at llvm.compiler.used = appending global [8 x ptr] [ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_exec_mode, ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_exec_mode, ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_exec_mode], section "llvm.metadata"
define weak void @__omp_offloading_14_a36502b_no_state_machine_needed_l14() #0 {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, ptr %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
@@ -152,7 +151,7 @@ user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
store i32 %1, ptr %.threadid_temp., align 4
call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) #3
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
worker.exit: ; preds = %entry
@@ -160,7 +159,7 @@ worker.exit: ; preds = %entry
}
; Make it a declaration so we will *not* apply custom state machine rewriting and wait for LTO.
-declare i32 @__kmpc_target_init(ptr);
+declare i32 @__kmpc_target_init(ptr, i8, i1);
define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
@@ -194,14 +193,14 @@ declare void @unknown_no_openmp() #2
declare i32 @__kmpc_global_thread_num(ptr) #3
-declare void @__kmpc_target_deinit()
+declare void @__kmpc_target_deinit(ptr, i8)
define weak void @__omp_offloading_14_a36502b_simple_state_machine_l22() #0 {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, ptr %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
@@ -209,7 +208,7 @@ user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
store i32 %1, ptr %.threadid_temp., align 4
call void @__omp_outlined__1(ptr %.threadid_temp., ptr %.zero.addr) #3
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
worker.exit: ; preds = %entry
@@ -294,7 +293,7 @@ entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, ptr %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
@@ -302,7 +301,7 @@ user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
store i32 %1, ptr %.threadid_temp., align 4
call void @__omp_outlined__4(ptr %.threadid_temp., ptr %.zero.addr) #3
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
worker.exit: ; preds = %entry
@@ -371,7 +370,7 @@ entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, ptr %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
@@ -379,7 +378,7 @@ user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
store i32 %1, ptr %.threadid_temp., align 4
call void @__omp_outlined__6(ptr %.threadid_temp., ptr %.zero.addr) #3
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
worker.exit: ; preds = %entry
@@ -457,7 +456,7 @@ entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, ptr %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
@@ -465,7 +464,7 @@ user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
store i32 %1, ptr %.threadid_temp., align 4
call void @__omp_outlined__9(ptr %.threadid_temp., ptr %.zero.addr) #3
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
worker.exit: ; preds = %entry
@@ -541,7 +540,7 @@ entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, ptr %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
@@ -549,7 +548,7 @@ user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
store i32 %1, ptr %.threadid_temp., align 4
call void @__omp_outlined__12(ptr %.threadid_temp., ptr %.zero.addr) #3
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
worker.exit: ; preds = %entry
@@ -628,7 +627,7 @@ entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, ptr %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
@@ -636,7 +635,7 @@ user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
store i32 %1, ptr %.threadid_temp., align 4
call void @__omp_outlined__15(ptr %.threadid_temp., ptr %.zero.addr) #3
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
worker.exit: ; preds = %entry
@@ -683,7 +682,7 @@ entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, ptr %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
@@ -691,7 +690,7 @@ user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
store i32 %1, ptr %.threadid_temp., align 4
call void @__omp_outlined__16(ptr %.threadid_temp., ptr %.zero.addr) #3
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
worker.exit: ; preds = %entry
@@ -833,4258 +832,1326 @@ attributes #9 = { convergent nounwind readonly willreturn }
!16 = !{i32 1, !"wchar_size", i32 4}
!17 = !{i32 7, !"openmp", i32 50}
!18 = !{i32 7, !"openmp-device", i32 50}
-;.
-; AMDGPU1: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; AMDGPU1: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
-; AMDGPU1: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
-; AMDGPU1: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4
-; AMDGPU1: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
-; AMDGPU1: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU1: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-;.
-; NVPTX1: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; NVPTX1: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
-; NVPTX1: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
-; NVPTX1: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4
-; NVPTX1: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
-; NVPTX1: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX1: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX1: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-;.
-; AMDGPU2: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; AMDGPU2: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
-; AMDGPU2: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
-; AMDGPU2: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4
-; AMDGPU2: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
-; AMDGPU2: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU2: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-;.
-; AMDGPU3: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; AMDGPU3: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
-; AMDGPU3: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
-; AMDGPU3: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4
-; AMDGPU3: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
-; AMDGPU3: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU3: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-;.
-; NVPTX2: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; NVPTX2: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
-; NVPTX2: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
-; NVPTX2: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4
-; NVPTX2: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
-; NVPTX2: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX2: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX2: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-;.
-; NVPTX3: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; NVPTX3: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
-; NVPTX3: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
-; NVPTX3: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4
-; NVPTX3: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 322, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
-; NVPTX3: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_NEEDED_L14_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_L22_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_L39_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_WITH_FALLBACK_L55_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_NO_OPENMP_ATTR_L66_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_PURE_L77_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX3: @[[__OMP_OFFLOADING_14_A36502B_SIMPLE_STATE_MACHINE_INTERPROCEDURAL_NESTED_RECURSIVE_L92_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX3: @[[__OMP_OFFLOADING_14_A36502B_NO_STATE_MACHINE_WEAK_CALLEE_L112_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-;.
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
-; AMDGPU1-SAME: () #[[ATTR0:[0-9]+]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment)
-; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; AMDGPU1: user_code.entry:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; AMDGPU1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU1-NEXT: ret void
-; AMDGPU1: worker.exit:
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
-; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
-; AMDGPU1-SAME: () #[[ATTR1:[0-9]+]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
-; AMDGPU1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; AMDGPU1-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
-; AMDGPU1: omp_if.then:
-; AMDGPU1-NEXT: store i32 0, ptr @G, align 4
-; AMDGPU1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
-; AMDGPU1-NEXT: br label [[OMP_IF_END]]
-; AMDGPU1: omp_if.end:
-; AMDGPU1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@no_parallel_region_in_here
-; AMDGPU1-SAME: () #[[ATTR1]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
-; AMDGPU1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; AMDGPU1-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
-; AMDGPU1: omp_if.then:
-; AMDGPU1-NEXT: store i32 0, ptr @G, align 4
-; AMDGPU1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]])
-; AMDGPU1-NEXT: br label [[OMP_IF_END]]
-; AMDGPU1: omp_if.end:
-; AMDGPU1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]])
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
-; AMDGPU1-SAME: () #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment)
-; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; AMDGPU1: user_code.entry:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU1-NEXT: ret void
-; AMDGPU1: worker.exit:
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__1
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__2
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
-; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__3
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @p1() #[[ATTR11]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
-; AMDGPU1-SAME: () #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment)
-; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; AMDGPU1: user_code.entry:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU1-NEXT: ret void
-; AMDGPU1: worker.exit:
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__4
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
-; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: noinline nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
-; AMDGPU1-SAME: () #[[ATTR6:[0-9]+]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
-; AMDGPU1-SAME: () #[[ATTR1]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__5
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @p1() #[[ATTR11]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: noinline nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
-; AMDGPU1-SAME: () #[[ATTR6]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
-; AMDGPU1-SAME: () #[[ATTR1]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
-; AMDGPU1-SAME: () #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment)
-; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; AMDGPU1: user_code.entry:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU1-NEXT: ret void
-; AMDGPU1: worker.exit:
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__6
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU1-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__7
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__8
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @p1() #[[ATTR11]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
-; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
-; AMDGPU1-SAME: () #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment)
-; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; AMDGPU1: user_code.entry:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU1-NEXT: ret void
-; AMDGPU1: worker.exit:
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__9
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__10
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
-; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__11
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @p1() #[[ATTR11]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
-; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
-; AMDGPU1-SAME: () #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment)
-; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; AMDGPU1: user_code.entry:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU1-NEXT: ret void
-; AMDGPU1: worker.exit:
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__12
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__13
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
-; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__14
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @p1() #[[ATTR11]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
-; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
-; AMDGPU1-SAME: () #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment)
-; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; AMDGPU1: user_code.entry:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU1-NEXT: ret void
-; AMDGPU1: worker.exit:
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__15
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
-; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: noinline nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
-; AMDGPU1-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; AMDGPU1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; AMDGPU1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
-; AMDGPU1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-; AMDGPU1: if.then:
-; AMDGPU1-NEXT: br label [[RETURN:%.*]]
-; AMDGPU1: if.end:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; AMDGPU1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
-; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
-; AMDGPU1-NEXT: br label [[RETURN]]
-; AMDGPU1: return:
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
-; AMDGPU1-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; AMDGPU1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; AMDGPU1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
-; AMDGPU1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-; AMDGPU1: if.then:
-; AMDGPU1-NEXT: br label [[RETURN:%.*]]
-; AMDGPU1: if.end:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; AMDGPU1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
-; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
-; AMDGPU1-NEXT: br label [[RETURN]]
-; AMDGPU1: return:
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
-; AMDGPU1-SAME: () #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment)
-; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; AMDGPU1: user_code.entry:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU1-NEXT: ret void
-; AMDGPU1: worker.exit:
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__16
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @weak_callee_empty() #[[ATTR9]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@weak_callee_empty
-; AMDGPU1-SAME: () #[[ATTR1]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__17
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
-; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__18
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
-; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: noinline nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
-; AMDGPU1-SAME: () #[[ATTR6]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
-; AMDGPU1-SAME: () #[[ATTR1]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__19
-; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; AMDGPU1: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
-; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
-; NVPTX1-SAME: () #[[ATTR0:[0-9]+]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment)
-; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; NVPTX1: user_code.entry:
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; NVPTX1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__kmpc_target_deinit()
-; NVPTX1-NEXT: ret void
-; NVPTX1: worker.exit:
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
-; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
-; NVPTX1-SAME: () #[[ATTR1:[0-9]+]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
-; NVPTX1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; NVPTX1-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
-; NVPTX1: omp_if.then:
-; NVPTX1-NEXT: store i32 0, ptr @G, align 4
-; NVPTX1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
-; NVPTX1-NEXT: br label [[OMP_IF_END]]
-; NVPTX1: omp_if.end:
-; NVPTX1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@no_parallel_region_in_here
-; NVPTX1-SAME: () #[[ATTR1]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
-; NVPTX1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; NVPTX1-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
-; NVPTX1: omp_if.then:
-; NVPTX1-NEXT: store i32 0, ptr @G, align 4
-; NVPTX1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]])
-; NVPTX1-NEXT: br label [[OMP_IF_END]]
-; NVPTX1: omp_if.end:
-; NVPTX1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]])
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
-; NVPTX1-SAME: () #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment)
-; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; NVPTX1: user_code.entry:
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__kmpc_target_deinit()
-; NVPTX1-NEXT: ret void
-; NVPTX1: worker.exit:
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__1
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__2
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
-; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__3
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @p1() #[[ATTR11]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
-; NVPTX1-SAME: () #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment)
-; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; NVPTX1: user_code.entry:
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__kmpc_target_deinit()
-; NVPTX1-NEXT: ret void
-; NVPTX1: worker.exit:
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__4
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
-; NVPTX1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: noinline nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
-; NVPTX1-SAME: () #[[ATTR6:[0-9]+]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
-; NVPTX1-SAME: () #[[ATTR1]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__5
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @p1() #[[ATTR11]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: noinline nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
-; NVPTX1-SAME: () #[[ATTR6]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
-; NVPTX1-SAME: () #[[ATTR1]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
-; NVPTX1-SAME: () #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment)
-; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; NVPTX1: user_code.entry:
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__kmpc_target_deinit()
-; NVPTX1-NEXT: ret void
-; NVPTX1: worker.exit:
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__6
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX1-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__7
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__8
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @p1() #[[ATTR11]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
-; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
-; NVPTX1-SAME: () #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment)
-; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; NVPTX1: user_code.entry:
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__kmpc_target_deinit()
-; NVPTX1-NEXT: ret void
-; NVPTX1: worker.exit:
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__9
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__10
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
-; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__11
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @p1() #[[ATTR11]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
-; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
-; NVPTX1-SAME: () #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment)
-; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; NVPTX1: user_code.entry:
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__kmpc_target_deinit()
-; NVPTX1-NEXT: ret void
-; NVPTX1: worker.exit:
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__12
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__13
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
-; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__14
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @p1() #[[ATTR11]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
-; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
-; NVPTX1-SAME: () #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment)
-; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; NVPTX1: user_code.entry:
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__kmpc_target_deinit()
-; NVPTX1-NEXT: ret void
-; NVPTX1: worker.exit:
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__15
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
-; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: noinline nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
-; NVPTX1-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; NVPTX1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; NVPTX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
-; NVPTX1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-; NVPTX1: if.then:
-; NVPTX1-NEXT: br label [[RETURN:%.*]]
-; NVPTX1: if.end:
-; NVPTX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; NVPTX1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
-; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
-; NVPTX1-NEXT: br label [[RETURN]]
-; NVPTX1: return:
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
-; NVPTX1-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; NVPTX1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; NVPTX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
-; NVPTX1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-; NVPTX1: if.then:
-; NVPTX1-NEXT: br label [[RETURN:%.*]]
-; NVPTX1: if.end:
-; NVPTX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; NVPTX1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
-; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
-; NVPTX1-NEXT: br label [[RETURN]]
-; NVPTX1: return:
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
-; NVPTX1-SAME: () #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment)
-; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; NVPTX1: user_code.entry:
-; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__kmpc_target_deinit()
-; NVPTX1-NEXT: ret void
-; NVPTX1: worker.exit:
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__16
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @weak_callee_empty() #[[ATTR9]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@weak_callee_empty
-; NVPTX1-SAME: () #[[ATTR1]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__17
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
-; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__18
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
-; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: noinline nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
-; NVPTX1-SAME: () #[[ATTR6]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
-; NVPTX1-SAME: () #[[ATTR1]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__19
-; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
-; NVPTX1-NEXT: ret void
-;
-;
-; NVPTX1: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
-; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX1-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
-; AMDGPU2-SAME: () #[[ATTR0:[0-9]+]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment)
-; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; AMDGPU2: user_code.entry:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; AMDGPU2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU2-NEXT: ret void
-; AMDGPU2: worker.exit:
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
-; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
-; AMDGPU2-SAME: () #[[ATTR1:[0-9]+]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
-; AMDGPU2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; AMDGPU2-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
-; AMDGPU2: omp_if.then:
-; AMDGPU2-NEXT: store i32 0, ptr @G, align 4
-; AMDGPU2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
-; AMDGPU2-NEXT: br label [[OMP_IF_END]]
-; AMDGPU2: omp_if.end:
-; AMDGPU2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@no_parallel_region_in_here
-; AMDGPU2-SAME: () #[[ATTR1]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
-; AMDGPU2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; AMDGPU2-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
-; AMDGPU2: omp_if.then:
-; AMDGPU2-NEXT: store i32 0, ptr @G, align 4
-; AMDGPU2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]])
-; AMDGPU2-NEXT: br label [[OMP_IF_END]]
-; AMDGPU2: omp_if.end:
-; AMDGPU2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]])
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
-; AMDGPU2-SAME: () #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment)
-; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; AMDGPU2: user_code.entry:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU2-NEXT: ret void
-; AMDGPU2: worker.exit:
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__1
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__2
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
-; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__3
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @p1() #[[ATTR11]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
-; AMDGPU2-SAME: () #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment)
-; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; AMDGPU2: user_code.entry:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU2-NEXT: ret void
-; AMDGPU2: worker.exit:
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__4
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
-; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: noinline nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
-; AMDGPU2-SAME: () #[[ATTR6:[0-9]+]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
-; AMDGPU2-SAME: () #[[ATTR1]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__5
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @p1() #[[ATTR11]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: noinline nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
-; AMDGPU2-SAME: () #[[ATTR6]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
-; AMDGPU2-SAME: () #[[ATTR1]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
-; AMDGPU2-SAME: () #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment)
-; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; AMDGPU2: user_code.entry:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU2-NEXT: ret void
-; AMDGPU2: worker.exit:
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__6
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU2-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__7
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__8
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @p1() #[[ATTR11]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
-; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
-; AMDGPU2-SAME: () #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment)
-; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; AMDGPU2: user_code.entry:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU2-NEXT: ret void
-; AMDGPU2: worker.exit:
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__9
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__10
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
-; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__11
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @p1() #[[ATTR11]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
-; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
-; AMDGPU2-SAME: () #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment)
-; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; AMDGPU2: user_code.entry:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU2-NEXT: ret void
-; AMDGPU2: worker.exit:
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__12
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__13
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
-; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__14
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @p1() #[[ATTR11]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
-; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
-; AMDGPU2-SAME: () #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment)
-; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; AMDGPU2: user_code.entry:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU2-NEXT: ret void
-; AMDGPU2: worker.exit:
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__15
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
-; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: noinline nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
-; AMDGPU2-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; AMDGPU2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; AMDGPU2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
-; AMDGPU2-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-; AMDGPU2: if.then:
-; AMDGPU2-NEXT: br label [[RETURN:%.*]]
-; AMDGPU2: if.end:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; AMDGPU2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
-; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
-; AMDGPU2-NEXT: br label [[RETURN]]
-; AMDGPU2: return:
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
-; AMDGPU2-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; AMDGPU2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; AMDGPU2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
-; AMDGPU2-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-; AMDGPU2: if.then:
-; AMDGPU2-NEXT: br label [[RETURN:%.*]]
-; AMDGPU2: if.end:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; AMDGPU2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
-; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
-; AMDGPU2-NEXT: br label [[RETURN]]
-; AMDGPU2: return:
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
-; AMDGPU2-SAME: () #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment)
-; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; AMDGPU2: user_code.entry:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU2-NEXT: ret void
-; AMDGPU2: worker.exit:
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__16
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @weak_callee_empty() #[[ATTR9]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@weak_callee_empty
-; AMDGPU2-SAME: () #[[ATTR1]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__17
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
-; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__18
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
-; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: noinline nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
-; AMDGPU2-SAME: () #[[ATTR6]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
-; AMDGPU2-SAME: () #[[ATTR1]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__19
-; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU2: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
-; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU2-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
-; AMDGPU3-SAME: () #[[ATTR0:[0-9]+]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment)
-; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; AMDGPU3: user_code.entry:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; AMDGPU3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU3-NEXT: ret void
-; AMDGPU3: worker.exit:
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
-; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
-; AMDGPU3-SAME: () #[[ATTR1:[0-9]+]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
-; AMDGPU3-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; AMDGPU3-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
-; AMDGPU3: omp_if.then:
-; AMDGPU3-NEXT: store i32 0, ptr @G, align 4
-; AMDGPU3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
-; AMDGPU3-NEXT: br label [[OMP_IF_END]]
-; AMDGPU3: omp_if.end:
-; AMDGPU3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@no_parallel_region_in_here
-; AMDGPU3-SAME: () #[[ATTR1]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
-; AMDGPU3-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; AMDGPU3-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
-; AMDGPU3: omp_if.then:
-; AMDGPU3-NEXT: store i32 0, ptr @G, align 4
-; AMDGPU3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]])
-; AMDGPU3-NEXT: br label [[OMP_IF_END]]
-; AMDGPU3: omp_if.end:
-; AMDGPU3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]])
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
-; AMDGPU3-SAME: () #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment)
-; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; AMDGPU3: user_code.entry:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU3-NEXT: ret void
-; AMDGPU3: worker.exit:
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__1
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__2
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
-; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__3
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @p1() #[[ATTR11]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
-; AMDGPU3-SAME: () #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment)
-; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; AMDGPU3: user_code.entry:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU3-NEXT: ret void
-; AMDGPU3: worker.exit:
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__4
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
-; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: noinline nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
-; AMDGPU3-SAME: () #[[ATTR6:[0-9]+]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
-; AMDGPU3-SAME: () #[[ATTR1]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__5
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @p1() #[[ATTR11]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: noinline nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
-; AMDGPU3-SAME: () #[[ATTR6]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
-; AMDGPU3-SAME: () #[[ATTR1]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
-; AMDGPU3-SAME: () #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment)
-; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; AMDGPU3: user_code.entry:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU3-NEXT: ret void
-; AMDGPU3: worker.exit:
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__6
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU3-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__7
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__8
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @p1() #[[ATTR11]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
-; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
-; AMDGPU3-SAME: () #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment)
-; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; AMDGPU3: user_code.entry:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU3-NEXT: ret void
-; AMDGPU3: worker.exit:
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__9
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__10
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
-; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__11
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @p1() #[[ATTR11]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
-; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
-; AMDGPU3-SAME: () #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment)
-; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; AMDGPU3: user_code.entry:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU3-NEXT: ret void
-; AMDGPU3: worker.exit:
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__12
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__13
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
-; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__14
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @p1() #[[ATTR11]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
-; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
-; AMDGPU3-SAME: () #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment)
-; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; AMDGPU3: user_code.entry:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU3-NEXT: ret void
-; AMDGPU3: worker.exit:
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__15
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
-; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: noinline nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
-; AMDGPU3-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; AMDGPU3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; AMDGPU3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
-; AMDGPU3-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-; AMDGPU3: if.then:
-; AMDGPU3-NEXT: br label [[RETURN:%.*]]
-; AMDGPU3: if.end:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; AMDGPU3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
-; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
-; AMDGPU3-NEXT: br label [[RETURN]]
-; AMDGPU3: return:
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
-; AMDGPU3-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; AMDGPU3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; AMDGPU3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
-; AMDGPU3-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-; AMDGPU3: if.then:
-; AMDGPU3-NEXT: br label [[RETURN:%.*]]
-; AMDGPU3: if.end:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; AMDGPU3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
-; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
-; AMDGPU3-NEXT: br label [[RETURN]]
-; AMDGPU3: return:
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
-; AMDGPU3-SAME: () #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment)
-; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; AMDGPU3: user_code.entry:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU3-NEXT: ret void
-; AMDGPU3: worker.exit:
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__16
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @weak_callee_empty() #[[ATTR9]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@weak_callee_empty
-; AMDGPU3-SAME: () #[[ATTR1]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__17
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
-; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__18
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
-; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: noinline nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
-; AMDGPU3-SAME: () #[[ATTR6]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
-; AMDGPU3-SAME: () #[[ATTR1]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__19
-; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; AMDGPU3: Function Attrs: convergent noinline norecurse nounwind
-; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
-; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; AMDGPU3-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
-; NVPTX2-SAME: () #[[ATTR0:[0-9]+]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment)
-; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; NVPTX2: user_code.entry:
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; NVPTX2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__kmpc_target_deinit()
-; NVPTX2-NEXT: ret void
-; NVPTX2: worker.exit:
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
-; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
-; NVPTX2-SAME: () #[[ATTR1:[0-9]+]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
-; NVPTX2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; NVPTX2-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
-; NVPTX2: omp_if.then:
-; NVPTX2-NEXT: store i32 0, ptr @G, align 4
-; NVPTX2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
-; NVPTX2-NEXT: br label [[OMP_IF_END]]
-; NVPTX2: omp_if.end:
-; NVPTX2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@no_parallel_region_in_here
-; NVPTX2-SAME: () #[[ATTR1]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
-; NVPTX2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; NVPTX2-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
-; NVPTX2: omp_if.then:
-; NVPTX2-NEXT: store i32 0, ptr @G, align 4
-; NVPTX2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]])
-; NVPTX2-NEXT: br label [[OMP_IF_END]]
-; NVPTX2: omp_if.end:
-; NVPTX2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]])
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
-; NVPTX2-SAME: () #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment)
-; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; NVPTX2: user_code.entry:
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__kmpc_target_deinit()
-; NVPTX2-NEXT: ret void
-; NVPTX2: worker.exit:
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__1
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__2
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
-; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__3
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @p1() #[[ATTR11]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
-; NVPTX2-SAME: () #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment)
-; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; NVPTX2: user_code.entry:
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__kmpc_target_deinit()
-; NVPTX2-NEXT: ret void
-; NVPTX2: worker.exit:
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__4
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
-; NVPTX2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: noinline nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
-; NVPTX2-SAME: () #[[ATTR6:[0-9]+]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
-; NVPTX2-SAME: () #[[ATTR1]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__5
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @p1() #[[ATTR11]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: noinline nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
-; NVPTX2-SAME: () #[[ATTR6]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
-; NVPTX2-SAME: () #[[ATTR1]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
-; NVPTX2-SAME: () #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment)
-; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; NVPTX2: user_code.entry:
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__kmpc_target_deinit()
-; NVPTX2-NEXT: ret void
-; NVPTX2: worker.exit:
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__6
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX2-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__7
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__8
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @p1() #[[ATTR11]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
-; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
-; NVPTX2-SAME: () #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment)
-; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; NVPTX2: user_code.entry:
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__kmpc_target_deinit()
-; NVPTX2-NEXT: ret void
-; NVPTX2: worker.exit:
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__9
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__10
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
-; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__11
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @p1() #[[ATTR11]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
-; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
-; NVPTX2-SAME: () #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment)
-; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; NVPTX2: user_code.entry:
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__kmpc_target_deinit()
-; NVPTX2-NEXT: ret void
-; NVPTX2: worker.exit:
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__12
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__13
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
-; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__14
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @p1() #[[ATTR11]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
-; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
-; NVPTX2-SAME: () #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment)
-; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; NVPTX2: user_code.entry:
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__kmpc_target_deinit()
-; NVPTX2-NEXT: ret void
-; NVPTX2: worker.exit:
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__15
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
-; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: noinline nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
-; NVPTX2-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; NVPTX2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; NVPTX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
-; NVPTX2-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-; NVPTX2: if.then:
-; NVPTX2-NEXT: br label [[RETURN:%.*]]
-; NVPTX2: if.end:
-; NVPTX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; NVPTX2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
-; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
-; NVPTX2-NEXT: br label [[RETURN]]
-; NVPTX2: return:
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
-; NVPTX2-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; NVPTX2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; NVPTX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
-; NVPTX2-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-; NVPTX2: if.then:
-; NVPTX2-NEXT: br label [[RETURN:%.*]]
-; NVPTX2: if.end:
-; NVPTX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; NVPTX2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
-; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
-; NVPTX2-NEXT: br label [[RETURN]]
-; NVPTX2: return:
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
-; NVPTX2-SAME: () #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment)
-; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; NVPTX2: user_code.entry:
-; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__kmpc_target_deinit()
-; NVPTX2-NEXT: ret void
-; NVPTX2: worker.exit:
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__16
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @weak_callee_empty() #[[ATTR9]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@weak_callee_empty
-; NVPTX2-SAME: () #[[ATTR1]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__17
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
-; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__18
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
-; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: noinline nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
-; NVPTX2-SAME: () #[[ATTR6]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
-; NVPTX2-SAME: () #[[ATTR1]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__19
-; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX2: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
-; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX2-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
-; NVPTX3-SAME: () #[[ATTR0:[0-9]+]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment)
-; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; NVPTX3: user_code.entry:
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; NVPTX3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__kmpc_target_deinit()
-; NVPTX3-NEXT: ret void
-; NVPTX3: worker.exit:
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
-; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
-; NVPTX3-SAME: () #[[ATTR1:[0-9]+]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
-; NVPTX3-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; NVPTX3-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
-; NVPTX3: omp_if.then:
-; NVPTX3-NEXT: store i32 0, ptr @G, align 4
-; NVPTX3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
-; NVPTX3-NEXT: br label [[OMP_IF_END]]
-; NVPTX3: omp_if.end:
-; NVPTX3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) #[[ATTR3]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@no_parallel_region_in_here
-; NVPTX3-SAME: () #[[ATTR1]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
-; NVPTX3-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; NVPTX3-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
-; NVPTX3: omp_if.then:
-; NVPTX3-NEXT: store i32 0, ptr @G, align 4
-; NVPTX3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]])
-; NVPTX3-NEXT: br label [[OMP_IF_END]]
-; NVPTX3: omp_if.end:
-; NVPTX3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]])
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
-; NVPTX3-SAME: () #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment)
-; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; NVPTX3: user_code.entry:
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__kmpc_target_deinit()
-; NVPTX3-NEXT: ret void
-; NVPTX3: worker.exit:
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__1
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__2
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
-; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__3
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @p1() #[[ATTR11]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
-; NVPTX3-SAME: () #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment)
-; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; NVPTX3: user_code.entry:
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__kmpc_target_deinit()
-; NVPTX3-NEXT: ret void
-; NVPTX3: worker.exit:
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__4
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
-; NVPTX3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: noinline nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
-; NVPTX3-SAME: () #[[ATTR6:[0-9]+]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
-; NVPTX3-SAME: () #[[ATTR1]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__5
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @p1() #[[ATTR11]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: noinline nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
-; NVPTX3-SAME: () #[[ATTR6]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
-; NVPTX3-SAME: () #[[ATTR1]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
-; NVPTX3-SAME: () #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment)
-; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; NVPTX3: user_code.entry:
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__kmpc_target_deinit()
-; NVPTX3-NEXT: ret void
-; NVPTX3: worker.exit:
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__6
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX3-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__7
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__8
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @p1() #[[ATTR11]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
-; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
-; NVPTX3-SAME: () #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment)
-; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; NVPTX3: user_code.entry:
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__kmpc_target_deinit()
-; NVPTX3-NEXT: ret void
-; NVPTX3: worker.exit:
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__9
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__10
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
-; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__11
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @p1() #[[ATTR11]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
-; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
-; NVPTX3-SAME: () #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment)
-; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; NVPTX3: user_code.entry:
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__kmpc_target_deinit()
-; NVPTX3-NEXT: ret void
-; NVPTX3: worker.exit:
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__12
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__13
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
-; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__14
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @p1() #[[ATTR11]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
-; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
-; NVPTX3-SAME: () #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment)
-; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; NVPTX3: user_code.entry:
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__kmpc_target_deinit()
-; NVPTX3-NEXT: ret void
-; NVPTX3: worker.exit:
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__15
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
-; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: noinline nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
-; NVPTX3-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; NVPTX3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; NVPTX3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
-; NVPTX3-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-; NVPTX3: if.then:
-; NVPTX3-NEXT: br label [[RETURN:%.*]]
-; NVPTX3: if.end:
-; NVPTX3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; NVPTX3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
-; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
-; NVPTX3-NEXT: br label [[RETURN]]
-; NVPTX3: return:
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
-; NVPTX3-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; NVPTX3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; NVPTX3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
-; NVPTX3-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-; NVPTX3: if.then:
-; NVPTX3-NEXT: br label [[RETURN:%.*]]
-; NVPTX3: if.end:
-; NVPTX3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
-; NVPTX3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
-; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
-; NVPTX3-NEXT: br label [[RETURN]]
-; NVPTX3: return:
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
-; NVPTX3-SAME: () #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment)
-; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; NVPTX3: user_code.entry:
-; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__kmpc_target_deinit()
-; NVPTX3-NEXT: ret void
-; NVPTX3: worker.exit:
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__16
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @weak_callee_empty() #[[ATTR9]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@weak_callee_empty
-; NVPTX3-SAME: () #[[ATTR1]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__17
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
-; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__18
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
-; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: noinline nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
-; NVPTX3-SAME: () #[[ATTR6]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
-; NVPTX3-SAME: () #[[ATTR1]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__19
-; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
-; NVPTX3-NEXT: ret void
-;
-;
-; NVPTX3: Function Attrs: convergent noinline norecurse nounwind
-; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
-; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
-; NVPTX3-NEXT: ret void
-;
-;.
-; AMDGPU1: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU1: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU1: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU1: attributes #[[ATTR3]] = { nounwind }
-; AMDGPU1: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU1: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
-; AMDGPU1: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU1: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU1: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU1: attributes #[[ATTR9]] = { convergent nounwind }
-; AMDGPU1: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
-; AMDGPU1: attributes #[[ATTR11]] = { convergent }
-;.
-; NVPTX1: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX1: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX1: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX1: attributes #[[ATTR3]] = { nounwind }
-; NVPTX1: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX1: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
-; NVPTX1: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX1: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX1: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX1: attributes #[[ATTR9]] = { convergent nounwind }
-; NVPTX1: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
-; NVPTX1: attributes #[[ATTR11]] = { convergent }
-;.
-; AMDGPU2: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU2: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU2: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU2: attributes #[[ATTR3]] = { nounwind }
-; AMDGPU2: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU2: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
-; AMDGPU2: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU2: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU2: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU2: attributes #[[ATTR9]] = { convergent nounwind }
-; AMDGPU2: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
-; AMDGPU2: attributes #[[ATTR11]] = { convergent }
-;.
-; AMDGPU3: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU3: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU3: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU3: attributes #[[ATTR3]] = { nounwind }
-; AMDGPU3: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU3: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
-; AMDGPU3: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU3: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU3: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; AMDGPU3: attributes #[[ATTR9]] = { convergent nounwind }
-; AMDGPU3: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
-; AMDGPU3: attributes #[[ATTR11]] = { convergent }
-;.
-; NVPTX2: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX2: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX2: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX2: attributes #[[ATTR3]] = { nounwind }
-; NVPTX2: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX2: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
-; NVPTX2: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX2: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX2: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX2: attributes #[[ATTR9]] = { convergent nounwind }
-; NVPTX2: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
-; NVPTX2: attributes #[[ATTR11]] = { convergent }
-;.
-; NVPTX3: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX3: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX3: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX3: attributes #[[ATTR3]] = { nounwind }
-; NVPTX3: attributes #[[ATTR4:[0-9]+]] = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX3: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
-; NVPTX3: attributes #[[ATTR6]] = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX3: attributes #[[ATTR7:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX3: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-; NVPTX3: attributes #[[ATTR9]] = { convergent nounwind }
-; NVPTX3: attributes #[[ATTR10]] = { convergent "llvm.assume"="omp_no_openmp" }
-; NVPTX3: attributes #[[ATTR11]] = { convergent }
-;.
-; AMDGPU1: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
-; AMDGPU1: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
-; AMDGPU1: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0}
-; AMDGPU1: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3}
-; AMDGPU1: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5}
-; AMDGPU1: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
-; AMDGPU1: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
-; AMDGPU1: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; AMDGPU1: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; AMDGPU1: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; AMDGPU1: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; AMDGPU1: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; AMDGPU1: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; AMDGPU1: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; AMDGPU1: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; AMDGPU1: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; AMDGPU1: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU1: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU1: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-;.
-; NVPTX1: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
-; NVPTX1: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
-; NVPTX1: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0}
-; NVPTX1: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3}
-; NVPTX1: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5}
-; NVPTX1: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
-; NVPTX1: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
-; NVPTX1: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; NVPTX1: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; NVPTX1: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; NVPTX1: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; NVPTX1: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; NVPTX1: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; NVPTX1: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; NVPTX1: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; NVPTX1: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; NVPTX1: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX1: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX1: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-;.
-; AMDGPU2: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
-; AMDGPU2: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
-; AMDGPU2: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0}
-; AMDGPU2: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3}
-; AMDGPU2: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5}
-; AMDGPU2: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
-; AMDGPU2: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
-; AMDGPU2: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; AMDGPU2: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; AMDGPU2: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; AMDGPU2: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; AMDGPU2: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; AMDGPU2: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; AMDGPU2: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; AMDGPU2: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; AMDGPU2: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; AMDGPU2: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU2: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU2: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-;.
-; AMDGPU3: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
-; AMDGPU3: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
-; AMDGPU3: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0}
-; AMDGPU3: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3}
-; AMDGPU3: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5}
-; AMDGPU3: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
-; AMDGPU3: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
-; AMDGPU3: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; AMDGPU3: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; AMDGPU3: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; AMDGPU3: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; AMDGPU3: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; AMDGPU3: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; AMDGPU3: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; AMDGPU3: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; AMDGPU3: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; AMDGPU3: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU3: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU3: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-;.
-; NVPTX2: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
-; NVPTX2: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
-; NVPTX2: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0}
-; NVPTX2: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3}
-; NVPTX2: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5}
-; NVPTX2: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
-; NVPTX2: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
-; NVPTX2: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; NVPTX2: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; NVPTX2: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; NVPTX2: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; NVPTX2: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; NVPTX2: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; NVPTX2: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; NVPTX2: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; NVPTX2: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; NVPTX2: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX2: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX2: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-;.
-; NVPTX3: [[META0:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
-; NVPTX3: [[META1:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
-; NVPTX3: [[META2:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_needed", i32 14, i32 0}
-; NVPTX3: [[META3:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_with_fallback", i32 55, i32 3}
-; NVPTX3: [[META4:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_pure", i32 77, i32 5}
-; NVPTX3: [[META5:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
-; NVPTX3: [[META6:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
-; NVPTX3: [[META7:![0-9]+]] = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-; NVPTX3: [[META8:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14, !"kernel", i32 1}
-; NVPTX3: [[META9:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_l22, !"kernel", i32 1}
-; NVPTX3: [[META10:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39, !"kernel", i32 1}
-; NVPTX3: [[META11:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55, !"kernel", i32 1}
-; NVPTX3: [[META12:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66, !"kernel", i32 1}
-; NVPTX3: [[META13:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77, !"kernel", i32 1}
-; NVPTX3: [[META14:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92, !"kernel", i32 1}
-; NVPTX3: [[META15:![0-9]+]] = !{ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112, !"kernel", i32 1}
-; NVPTX3: [[META16:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX3: [[META17:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX3: [[META18:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-;.
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
+; AMDGPU-SAME: () #[[ATTR0:[0-9]+]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
+; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+; AMDGPU: user_code.entry:
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
+; AMDGPU-NEXT: ret void
+; AMDGPU: worker.exit:
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
+; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
+; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; AMDGPU-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
+; AMDGPU: omp_if.then:
+; AMDGPU-NEXT: store i32 0, ptr @G, align 4
+; AMDGPU-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU-NEXT: br label [[OMP_IF_END]]
+; AMDGPU: omp_if.end:
+; AMDGPU-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here
+; AMDGPU-SAME: () #[[ATTR1]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
+; AMDGPU-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; AMDGPU-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
+; AMDGPU: omp_if.then:
+; AMDGPU-NEXT: store i32 0, ptr @G, align 4
+; AMDGPU-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]])
+; AMDGPU-NEXT: br label [[OMP_IF_END]]
+; AMDGPU: omp_if.end:
+; AMDGPU-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]])
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
+; AMDGPU-SAME: () #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+; AMDGPU: user_code.entry:
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
+; AMDGPU-NEXT: ret void
+; AMDGPU: worker.exit:
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
+; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
+; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
+; AMDGPU-SAME: () #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+; AMDGPU: user_code.entry:
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
+; AMDGPU-NEXT: ret void
+; AMDGPU: worker.exit:
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
+; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: noinline nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
+; AMDGPU-SAME: () #[[ATTR6:[0-9]+]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
+; AMDGPU-SAME: () #[[ATTR1]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
+; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: noinline nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
+; AMDGPU-SAME: () #[[ATTR6]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
+; AMDGPU-SAME: () #[[ATTR1]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
+; AMDGPU-SAME: () #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+; AMDGPU: user_code.entry:
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
+; AMDGPU-NEXT: ret void
+; AMDGPU: worker.exit:
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
+; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
+; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
+; AMDGPU-SAME: () #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+; AMDGPU: user_code.entry:
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
+; AMDGPU-NEXT: ret void
+; AMDGPU: worker.exit:
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__10
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
+; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__11
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
+; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
+; AMDGPU-SAME: () #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+; AMDGPU: user_code.entry:
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
+; AMDGPU-NEXT: ret void
+; AMDGPU: worker.exit:
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__12
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__13
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
+; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__14
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @p1() #[[ATTR11]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
+; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
+; AMDGPU-SAME: () #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+; AMDGPU: user_code.entry:
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
+; AMDGPU-NEXT: ret void
+; AMDGPU: worker.exit:
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__15
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
+; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: noinline nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
+; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; AMDGPU-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
+; AMDGPU-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; AMDGPU: if.then:
+; AMDGPU-NEXT: br label [[RETURN:%.*]]
+; AMDGPU: if.end:
+; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
+; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
+; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
+; AMDGPU-NEXT: br label [[RETURN]]
+; AMDGPU: return:
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
+; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; AMDGPU-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
+; AMDGPU-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; AMDGPU: if.then:
+; AMDGPU-NEXT: br label [[RETURN:%.*]]
+; AMDGPU: if.end:
+; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
+; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
+; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
+; AMDGPU-NEXT: br label [[RETURN]]
+; AMDGPU: return:
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
+; AMDGPU-SAME: () #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
+; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+; AMDGPU: user_code.entry:
+; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
+; AMDGPU-NEXT: ret void
+; AMDGPU: worker.exit:
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__16
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @weak_callee_empty() #[[ATTR9]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@weak_callee_empty
+; AMDGPU-SAME: () #[[ATTR1]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__17
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
+; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__18
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
+; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: noinline nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
+; AMDGPU-SAME: () #[[ATTR6]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
+; AMDGPU-SAME: () #[[ATTR1]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__19
+; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @p0() #[[ATTR11]]
+; AMDGPU-NEXT: ret void
+;
+;
+; AMDGPU: Function Attrs: convergent noinline norecurse nounwind
+; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
+; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; AMDGPU-NEXT: entry:
+; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; AMDGPU-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
+; NVPTX-SAME: () #[[ATTR0:[0-9]+]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true)
+; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+; NVPTX: user_code.entry:
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
+; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
+; NVPTX-NEXT: ret void
+; NVPTX: worker.exit:
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
+; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline nounwind
+; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
+; NVPTX-SAME: () #[[ATTR1:[0-9]+]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; NVPTX-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
+; NVPTX: omp_if.then:
+; NVPTX-NEXT: store i32 0, ptr @G, align 4
+; NVPTX-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX-NEXT: br label [[OMP_IF_END]]
+; NVPTX: omp_if.end:
+; NVPTX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline nounwind
+; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here
+; NVPTX-SAME: () #[[ATTR1]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB2]], i32 [[TMP0]])
+; NVPTX-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; NVPTX-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
+; NVPTX: omp_if.then:
+; NVPTX-NEXT: store i32 0, ptr @G, align 4
+; NVPTX-NEXT: call void @__kmpc_end_single(ptr @[[GLOB2]], i32 [[TMP0]])
+; NVPTX-NEXT: br label [[OMP_IF_END]]
+; NVPTX: omp_if.end:
+; NVPTX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]])
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
+; NVPTX-SAME: () #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+; NVPTX: user_code.entry:
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
+; NVPTX-NEXT: ret void
+; NVPTX: worker.exit:
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
+; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
+; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
+; NVPTX-SAME: () #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+; NVPTX: user_code.entry:
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
+; NVPTX-NEXT: ret void
+; NVPTX: worker.exit:
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; NVPTX-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
+; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: noinline nounwind
+; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
+; NVPTX-SAME: () #[[ATTR6:[0-9]+]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline nounwind
+; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
+; NVPTX-SAME: () #[[ATTR1]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
+; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: noinline nounwind
+; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
+; NVPTX-SAME: () #[[ATTR6]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline nounwind
+; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
+; NVPTX-SAME: () #[[ATTR1]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
+; NVPTX-SAME: () #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+; NVPTX: user_code.entry:
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
+; NVPTX-NEXT: ret void
+; NVPTX: worker.exit:
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
+; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
+; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
+; NVPTX-SAME: () #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+; NVPTX: user_code.entry:
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
+; NVPTX-NEXT: ret void
+; NVPTX: worker.exit:
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__10
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
+; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__11
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
+; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
+; NVPTX-SAME: () #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+; NVPTX: user_code.entry:
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
+; NVPTX-NEXT: ret void
+; NVPTX: worker.exit:
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__12
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__13
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
+; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__14
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @p1() #[[ATTR11]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
+; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
+; NVPTX-SAME: () #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+; NVPTX: user_code.entry:
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
+; NVPTX-NEXT: ret void
+; NVPTX: worker.exit:
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__15
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
+; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: noinline nounwind
+; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
+; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; NVPTX-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
+; NVPTX-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; NVPTX: if.then:
+; NVPTX-NEXT: br label [[RETURN:%.*]]
+; NVPTX: if.end:
+; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
+; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
+; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
+; NVPTX-NEXT: br label [[RETURN]]
+; NVPTX: return:
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline nounwind
+; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
+; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; NVPTX-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
+; NVPTX-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; NVPTX: if.then:
+; NVPTX-NEXT: br label [[RETURN:%.*]]
+; NVPTX: if.end:
+; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
+; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
+; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
+; NVPTX-NEXT: br label [[RETURN]]
+; NVPTX: return:
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
+; NVPTX-SAME: () #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true)
+; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
+; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+; NVPTX: user_code.entry:
+; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
+; NVPTX-NEXT: ret void
+; NVPTX: worker.exit:
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__16
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @weak_callee_empty() #[[ATTR9]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline nounwind
+; NVPTX-LABEL: define {{[^@]+}}@weak_callee_empty
+; NVPTX-SAME: () #[[ATTR1]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__17
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
+; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__18
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
+; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: noinline nounwind
+; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
+; NVPTX-SAME: () #[[ATTR6]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline nounwind
+; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
+; NVPTX-SAME: () #[[ATTR1]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__19
+; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @p0() #[[ATTR11]]
+; NVPTX-NEXT: ret void
+;
+;
+; NVPTX: Function Attrs: convergent noinline norecurse nounwind
+; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
+; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; NVPTX-NEXT: entry:
+; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
+; NVPTX-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: ret void
+;
diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll b/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll
index 0c542bb27f4f46..1d95ea071f2ff2 100644
--- a/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll
+++ b/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll
@@ -36,8 +36,6 @@ target triple = "nvptx64"
;; }
%struct.ident_t = type { i32, i32, i32, i32, ptr }
-%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@0 = private unnamed_addr constant [113 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@@ -45,24 +43,24 @@ target triple = "nvptx64"
@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @2 }, align 8
@4 = private unnamed_addr constant [114 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;25;;\00", align 1
@5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @4 }, align 8
+ at __omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode = weak constant i8 1
@6 = private unnamed_addr constant [116 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;1;;\00", align 1
@7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @6 }, align 8
@8 = private unnamed_addr constant [85 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;test_no_fallback;20;1;;\00", align 1
@9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @8 }, align 8
@10 = private unnamed_addr constant [117 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;25;;\00", align 1
@11 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @10 }, align 8
+ at __omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode = weak constant i8 1
@12 = private unnamed_addr constant [73 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;known;4;1;;\00", align 1
@13 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @12 }, align 8
@G = external global i32
- at __omp_offloading_2a_d80d3d_test_fallback_l11_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr null, ptr null }
- at __omp_offloading_2a_d80d3d_test_no_fallback_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr null, ptr null }
-
+ at llvm.compiler.used = appending global [2 x ptr] [ptr @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode, ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode], section "llvm.metadata"
; Function Attrs: convergent norecurse nounwind
define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 {
entry:
%captured_vars_addrs.i.i = alloca [0 x ptr], align 8
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2a_d80d3d_test_fallback_l11_kernel_environment) #3, !dbg !18
+ %0 = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #3, !dbg !18
%exec_user_code = icmp eq i32 %0, -1, !dbg !18
br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18
@@ -77,12 +75,12 @@ user_code.entry: ; preds = %entry
call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %2, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i.i, i64 noundef 0) #3, !dbg !23
call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i.i) #3, !dbg !26
call void @unknown() #6, !dbg !27
- call void @__kmpc_target_deinit() #3, !dbg !28
+ call void @__kmpc_target_deinit(ptr nonnull @5, i8 1) #3, !dbg !28
br label %common.ret
}
; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
-define weak i32 @__kmpc_target_init(ptr) {
+define weak i32 @__kmpc_target_init(ptr, i8, i1) {
ret i32 0
}
@@ -101,13 +99,13 @@ entry:
; Function Attrs: nounwind
declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #3
-declare void @__kmpc_target_deinit() local_unnamed_addr
+declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr
; Function Attrs: norecurse nounwind
define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 {
entry:
%captured_vars_addrs.i2.i = alloca [0 x ptr], align 8
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20_kernel_environment) #3, !dbg !33
+ %0 = call i32 @__kmpc_target_init(ptr nonnull @7, i8 1, i1 true) #3, !dbg !33
%exec_user_code = icmp eq i32 %0, -1, !dbg !33
br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33
@@ -130,7 +128,7 @@ user_code.entry: ; preds = %entry
call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !45
call void @no_openmp()
call void @no_parallelism()
- call void @__kmpc_target_deinit() #3, !dbg !46
+ call void @__kmpc_target_deinit(ptr nonnull @11, i8 1) #3, !dbg !46
br label %common.ret
}
diff --git a/llvm/test/Transforms/OpenMP/deduplication_target.ll b/llvm/test/Transforms/OpenMP/deduplication_target.ll
index eb9177cff4bbfe..040b45e8122137 100644
--- a/llvm/test/Transforms/OpenMP/deduplication_target.ll
+++ b/llvm/test/Transforms/OpenMP/deduplication_target.ll
@@ -5,13 +5,12 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64"
%struct.ident_t = type { i32, i32, i32, i32, ptr }
-%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 1, i32 0, ptr @0 }, align 8
- at __omp_offloading_50_a3e09bf8_foo_l2_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2 }, ptr null, ptr null }
+ at __omp_offloading_50_a3e09bf8_foo_l2_exec_mode = weak constant i8 0
+ at llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_50_a3e09bf8_foo_l2_exec_mode], section "llvm.metadata"
declare void @use(i32)
@@ -20,37 +19,37 @@ define weak void @__omp_offloading_50_a3e09bf8_foo_l2() #0 {
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_50_a3e09bf8_foo_l2_kernel_environment)
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
-; CHECK-NEXT: call void @__kmpc_target_deinit()
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
;
entry:
%captured_vars_addrs = alloca [0 x ptr], align 8
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_50_a3e09bf8_foo_l2_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 2, i1 false)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @2)
%2 = call i32 @__kmpc_global_thread_num(ptr @2)
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 2)
ret void
worker.exit: ; preds = %entry
ret void
}
-declare i32 @__kmpc_target_init(ptr)
+declare i32 @__kmpc_target_init(ptr, i8, i1)
declare i32 @__kmpc_global_thread_num(ptr) #1
-declare void @__kmpc_target_deinit()
+declare void @__kmpc_target_deinit(ptr, i8)
attributes #0 = { convergent noinline norecurse nounwind "kernel" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
attributes #1 = { nounwind }
diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
index d8605f7150e74a..0cf6c73bd2fd34 100644
--- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
+++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
@@ -2,27 +2,26 @@
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
target triple = "nvptx64"
-%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
- at G = external global i32
-
- at kernel0_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }
- at kernel1_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }
- at kernel2_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }
+ at kernel0_exec_mode = weak constant i8 1
+ at G = external global i32
;.
+; CHECK: @[[KERNEL0_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32
-; CHECK: @[[KERNEL0_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr null, ptr null }
-; CHECK: @[[KERNEL1_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr null, ptr null }
-; CHECK: @[[KERNEL2_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr null, ptr null }
+; CHECK: @[[KERNEL1_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; CHECK: @[[KERNEL2_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; CHECK: @[[KERNEL0_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
+; CHECK: @[[KERNEL1_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
+; CHECK: @[[KERNEL2_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
;.
define weak void @kernel0() "kernel" #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel0
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @kernel0_kernel_environment)
+; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0
; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]]
@@ -32,21 +31,24 @@ define weak void @kernel0() "kernel" #0 {
; CHECK-NEXT: call void @helper0() #[[ATTR1:[0-9]+]]
; CHECK-NEXT: call void @helper1() #[[ATTR1]]
; CHECK-NEXT: call void @helper2() #[[ATTR1]]
-; CHECK-NEXT: call void @__kmpc_target_deinit()
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2)
; CHECK-NEXT: ret void
;
- %i = call i32 @__kmpc_target_init(ptr @kernel0_kernel_environment)
+
+ %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
call void @helper0()
call void @helper1()
call void @helper2()
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr null, i8 1)
ret void
}
+ at kernel1_exec_mode = weak constant i8 1
+
define weak void @kernel1() "kernel" #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel1
; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @kernel1_kernel_environment)
+; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0
; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]]
@@ -54,21 +56,24 @@ define weak void @kernel1() "kernel" #0 {
; CHECK-NEXT: ret void
; CHECK: main.thread.user_code:
; CHECK-NEXT: call void @helper1() #[[ATTR1]]
-; CHECK-NEXT: call void @__kmpc_target_deinit()
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2)
; CHECK-NEXT: ret void
;
- %i = call i32 @__kmpc_target_init(ptr @kernel1_kernel_environment)
+
+ %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
call void @helper1()
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr null, i8 1)
ret void
}
+ at kernel2_exec_mode = weak constant i8 1
+
define weak void @kernel2() "kernel" #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel2
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @kernel2_kernel_environment)
+; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[I]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
@@ -79,12 +84,12 @@ define weak void @kernel2() "kernel" #0 {
; CHECK-NEXT: call void @helper1() #[[ATTR1]]
; CHECK-NEXT: call void @helper2() #[[ATTR1]]
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; CHECK-NEXT: call void @__kmpc_target_deinit()
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2)
; CHECK-NEXT: ret void
;
entry:
%captured_vars_addrs = alloca [0 x ptr], align 8
- %i = call i32 @__kmpc_target_init(ptr @kernel2_kernel_environment)
+ %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 true)
%exec_user_code = icmp eq i32 %i, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
@@ -97,7 +102,7 @@ user_code.entry:
call void @helper1()
call void @helper2()
call void @__kmpc_parallel_51(ptr null, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr %captured_vars_addrs, i64 0)
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr null, i8 1)
ret void
}
@@ -193,8 +198,8 @@ define internal i32 @__kmpc_get_hardware_num_threads_in_block() {
ret i32 %ret
}
declare i32 @__kmpc_get_hardware_num_threads_in_block_dummy()
-declare i32 @__kmpc_target_init(ptr) #1
-declare void @__kmpc_target_deinit() #1
+declare i32 @__kmpc_target_init(ptr, i8, i1 zeroext) #1
+declare void @__kmpc_target_deinit(ptr nocapture readnone, i8) #1
declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64)
declare i32 @__kmpc_global_thread_num(ptr)
@@ -209,6 +214,7 @@ attributes #0 = { "omp_target_thread_limit"="666" "omp_target_num_teams"="777"}
!2 = !{ptr @kernel0, !"kernel", i32 1}
!3 = !{ptr @kernel1, !"kernel", i32 1}
!4 = !{ptr @kernel2, !"kernel", i32 1}
+;
;.
; CHECK: attributes #[[ATTR0]] = { "kernel" "omp_target_num_teams"="777" "omp_target_thread_limit"="666" }
; CHECK: attributes #[[ATTR1]] = { nounwind }
diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll
index 7910ad19407efd..12b5e713fbb712 100644
--- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll
+++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll
@@ -2,22 +2,28 @@
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
target triple = "nvptx64"
- at G = external global i32
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
+
+ at kernel0_exec_mode = weak constant i8 1
+ at G = external global i32
;.
+; CHECK: @[[KERNEL0_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32
+; CHECK: @[[KERNEL1_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; CHECK: @[[KERNEL2_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
;.
define weak void @kernel0() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel0
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null)
+; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false)
; CHECK-NEXT: call void @helper0()
; CHECK-NEXT: call void @helper1()
; CHECK-NEXT: call void @helper2()
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i1 true)
; CHECK-NEXT: ret void
;
- %i = call i32 @__kmpc_target_init(ptr null)
+ %i = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false)
call void @helper0()
call void @helper1()
call void @helper2()
@@ -25,20 +31,24 @@ define weak void @kernel0() #0 {
ret void
}
+ at kernel1_exec_mode = weak constant i8 1
+
define weak void @kernel1() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel1
; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null)
+; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false)
; CHECK-NEXT: call void @helper1()
-; CHECK-NEXT: call void @__kmpc_target_deinit()
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i1 false)
; CHECK-NEXT: ret void
;
- %i = call i32 @__kmpc_target_init(ptr null)
+ %i = call i32 @__kmpc_target_init(ptr null, i1 true, i1 false)
call void @helper1()
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr null, i1 false)
ret void
}
+ at kernel2_exec_mode = weak constant i8 1
+
define weak void @kernel2() #0 {
; CHECK-LABEL: define {{[^@]+}}@kernel2
; CHECK-SAME: () #[[ATTR0]] {
@@ -46,14 +56,14 @@ define weak void @kernel2() #0 {
; CHECK-NEXT: call void @helper0()
; CHECK-NEXT: call void @helper1()
; CHECK-NEXT: call void @helper2()
-; CHECK-NEXT: call void @__kmpc_target_deinit()
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i1 false)
; CHECK-NEXT: ret void
;
%i = call i32 @__kmpc_target_init(ptr null, i1 false, i1 false)
call void @helper0()
call void @helper1()
call void @helper2()
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr null, i1 false)
ret void
}
@@ -102,8 +112,8 @@ define internal void @helper2() {
}
declare i32 @__kmpc_get_hardware_num_threads_in_block()
-declare i32 @__kmpc_target_init(ptr) #1
-declare void @__kmpc_target_deinit() #1
+declare i32 @__kmpc_target_init(ptr, i1 zeroext, i1 zeroext) #1
+declare void @__kmpc_target_deinit(ptr nocapture readnone, i1 zeroext) #1
!llvm.module.flags = !{!0, !1}
diff --git a/llvm/test/Transforms/OpenMP/global_constructor.ll b/llvm/test/Transforms/OpenMP/global_constructor.ll
index e1a394b526a97a..1b0b78ad145d83 100644
--- a/llvm/test/Transforms/OpenMP/global_constructor.ll
+++ b/llvm/test/Transforms/OpenMP/global_constructor.ll
@@ -2,17 +2,15 @@
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
%struct.ident_t = type { i32, i32, i32, i32, ptr }
-%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@_ZL6Device = internal global double 0.000000e+00, align 8
- at __omp_offloading_fd02_85283c04_main_l11_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr @1, ptr null }
+ at __omp_offloading_fd02_85283c04_main_l11_exec_mode = weak constant i8 0
define weak void @__omp_offloading_fd02_85283c04_main_l11(ptr nonnull align 8 dereferenceable(8) %X) local_unnamed_addr "kernel" {
entry:
- %0 = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_85283c04_main_l11_kernel_environment) #0
+ %0 = tail call i32 @__kmpc_target_init(ptr nonnull @1, i8 2, i1 false) #0
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
@@ -31,13 +29,13 @@ region.guarded:
region.barrier:
tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @1, i32 %2)
- tail call void @__kmpc_target_deinit() #0
+ tail call void @__kmpc_target_deinit(ptr nonnull @1, i8 2) #0
br label %common.ret
}
-declare i32 @__kmpc_target_init(ptr) local_unnamed_addr
+declare i32 @__kmpc_target_init(ptr, i8, i1) local_unnamed_addr
-declare void @__kmpc_target_deinit() local_unnamed_addr
+declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr
define weak void @__omp_offloading__fd02_85283c04_Device_l6_ctor() "kernel" {
entry:
@@ -80,7 +78,7 @@ attributes #1 = { convergent nounwind }
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_85283c04_main_l11
; CHECK-SAME: (ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_85283c04_main_l11_kernel_environment) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1:[0-9]+]], i8 2, i1 false) #[[ATTR1:[0-9]+]]
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
@@ -94,8 +92,8 @@ attributes #1 = { convergent nounwind }
; CHECK-NEXT: store double [[TMP1]], ptr [[X]], align 8, !tbaa [[TBAA11]]
; CHECK-NEXT: br label [[REGION_BARRIER]]
; CHECK: region.barrier:
-; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1:[0-9]+]], i32 [[TMP2]]) #[[ATTR1]]
-; CHECK-NEXT: tail call void @__kmpc_target_deinit() #[[ATTR1]]
+; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR1]]
+; CHECK-NEXT: tail call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2) #[[ATTR1]]
; CHECK-NEXT: br label [[COMMON_RET]]
;
;
diff --git a/llvm/test/Transforms/OpenMP/globalization_remarks.ll b/llvm/test/Transforms/OpenMP/globalization_remarks.ll
index fdd0e41fd25258..d1d43c934fe62a 100644
--- a/llvm/test/Transforms/OpenMP/globalization_remarks.ll
+++ b/llvm/test/Transforms/OpenMP/globalization_remarks.ll
@@ -7,19 +7,17 @@ target triple = "nvptx64"
; CHECK: remark: globalization_remarks.c:5:7: Could not move globalized variable to the stack. Variable is potentially captured in call. Mark parameter as `__attribute__((noescape))` to override.
; CHECK: remark: globalization_remarks.c:5:7: Found thread data sharing on the GPU. Expect degraded performance due to data globalization.
-%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
@S = external local_unnamed_addr global ptr
- at foo_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }
define void @foo() "kernel" {
entry:
- %c = call i32 @__kmpc_target_init(ptr @foo_kernel_environment)
+ %c = call i32 @__kmpc_target_init(ptr null, i1 false, i1 true)
%0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !10
call void @share(ptr %0), !dbg !10
call void @__kmpc_free_shared(ptr %0)
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr null, i1 false)
ret void
}
@@ -33,9 +31,9 @@ declare ptr @__kmpc_alloc_shared(i64)
declare void @__kmpc_free_shared(ptr nocapture)
-declare i32 @__kmpc_target_init(ptr);
+declare i32 @__kmpc_target_init(ptr, i1, i1);
-declare void @__kmpc_target_deinit()
+declare void @__kmpc_target_deinit(ptr, i1)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6}
diff --git a/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll
index 37348e8474b57e..3579fe4a2970c0 100644
--- a/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll
+++ b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll
@@ -36,20 +36,19 @@
%struct.ident_t = type { i32, i32, i32, i32, ptr }
-%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
+ at __omp_offloading_10301_87b2c_foo_l7_exec_mode = weak constant i8 1
@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8
- at __omp_offloading_10301_87b2c_foo_l7_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
+ at llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_10301_87b2c_foo_l7_exec_mode], section "llvm.metadata"
define weak void @__omp_offloading_10301_87b2c_foo_l7() "kernel" {
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
store i32 0, ptr %.zero.addr, align 4
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_10301_87b2c_foo_l7_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
@@ -57,14 +56,14 @@ user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
store i32 %1, ptr %.threadid_temp., align 4
call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr)
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
worker.exit: ; preds = %entry
ret void
}
-define weak i32 @__kmpc_target_init(ptr %0) {
+define weak i32 @__kmpc_target_init(ptr, i8, i1) {
ret i32 0
}
@@ -147,7 +146,7 @@ entry:
declare i32 @__kmpc_global_thread_num(ptr)
-declare void @__kmpc_target_deinit()
+declare void @__kmpc_target_deinit(ptr, i8)
define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
entry:
diff --git a/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll b/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll
index 528232fd9e2f94..0e816087ca116f 100644
--- a/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll
+++ b/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll
@@ -2,37 +2,42 @@
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
target triple = "nvptx64"
-%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
+ at is_spmd_exec_mode = weak constant i8 0
+ at will_be_spmd_exec_mode = weak constant i8 1
+ at non_spmd_exec_mode = weak constant i8 1
+ at will_not_be_spmd_exec_mode = weak constant i8 1
@G = external global i8
- at is_spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2 }, ptr null, ptr null }
- at will_be_spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }
- at none_spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }
- at will_not_be_spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }
+ at llvm.compiler.used = appending global [4 x ptr] [ptr @is_spmd_exec_mode, ptr @will_be_spmd_exec_mode, ptr @non_spmd_exec_mode, ptr @will_not_be_spmd_exec_mode ], section "llvm.metadata"
;.
+; CHECK: @[[IS_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
+; CHECK: @[[WILL_BE_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; CHECK: @[[NON_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; CHECK: @[[WILL_NOT_BE_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i8
-; CHECK: @[[IS_SPMD_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 2 }, ptr null, ptr null }
-; CHECK: @[[WILL_BE_SPMD_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr null, ptr null }
-; CHECK: @[[NONE_SPMD_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr null, ptr null }
-; CHECK: @[[WILL_NOT_BE_SPMD_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr null, ptr null }
+; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [4 x ptr] [ptr @is_spmd_exec_mode, ptr @will_be_spmd_exec_mode, ptr @non_spmd_exec_mode, ptr @will_not_be_spmd_exec_mode], section "llvm.metadata"
+; CHECK: @[[IS_SPMD_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
+; CHECK: @[[WILL_BE_SPMD_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
+; CHECK: @[[NON_SPMD_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
+; CHECK: @[[WILL_NOT_BE_SPMD_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
;.
define weak void @is_spmd() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@is_spmd
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @is_spmd_kernel_environment)
+; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
; CHECK-NEXT: call void @is_spmd_helper1()
; CHECK-NEXT: call void @is_spmd_helper2()
; CHECK-NEXT: call void @is_mixed_helper()
-; CHECK-NEXT: call void @__kmpc_target_deinit()
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2)
; CHECK-NEXT: ret void
;
- %i = call i32 @__kmpc_target_init(ptr @is_spmd_kernel_environment)
+ %i = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
call void @is_spmd_helper1()
call void @is_spmd_helper2()
call void @is_mixed_helper()
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr null, i8 2)
ret void
}
@@ -41,7 +46,7 @@ define weak void @will_be_spmd() "kernel" {
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @will_be_spmd_kernel_environment)
+; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[I]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
@@ -50,12 +55,12 @@ define weak void @will_be_spmd() "kernel" {
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr null) #[[ATTR3:[0-9]+]]
; CHECK-NEXT: call void @is_spmd_helper2()
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; CHECK-NEXT: call void @__kmpc_target_deinit()
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2)
; CHECK-NEXT: ret void
;
entry:
%captured_vars_addrs = alloca [0 x ptr], align 8
- %i = call i32 @__kmpc_target_init(ptr @will_be_spmd_kernel_environment)
+ %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 true)
%exec_user_code = icmp eq i32 %i, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
@@ -66,43 +71,43 @@ user_code.entry:
%0 = call i32 @__kmpc_global_thread_num(ptr null)
call void @is_spmd_helper2()
call void @__kmpc_parallel_51(ptr null, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr %captured_vars_addrs, i64 0)
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr null, i8 1)
ret void
}
define weak void @non_spmd() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@non_spmd
; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @none_spmd_kernel_environment)
+; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
; CHECK-NEXT: call void @is_generic_helper1()
; CHECK-NEXT: call void @is_generic_helper2()
; CHECK-NEXT: call void @is_mixed_helper()
-; CHECK-NEXT: call void @__kmpc_target_deinit()
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 1)
; CHECK-NEXT: ret void
;
- %i = call i32 @__kmpc_target_init(ptr @none_spmd_kernel_environment)
+ %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
call void @is_generic_helper1()
call void @is_generic_helper2()
call void @is_mixed_helper()
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr null, i8 1)
ret void
}
define weak void @will_not_be_spmd() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@will_not_be_spmd
; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @will_not_be_spmd_kernel_environment)
+; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
; CHECK-NEXT: call void @is_generic_helper1()
; CHECK-NEXT: call void @is_generic_helper2()
; CHECK-NEXT: call void @is_mixed_helper()
-; CHECK-NEXT: call void @__kmpc_target_deinit()
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 1)
; CHECK-NEXT: ret void
;
- %i = call i32 @__kmpc_target_init(ptr @will_not_be_spmd_kernel_environment)
+ %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
call void @is_generic_helper1()
call void @is_generic_helper2()
call void @is_mixed_helper()
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr null, i8 1)
ret void
}
@@ -199,8 +204,8 @@ entry:
declare void @spmd_compatible() "llvm.assume"="ompx_spmd_amenable"
declare i8 @__kmpc_is_spmd_exec_mode()
-declare i32 @__kmpc_target_init(ptr)
-declare void @__kmpc_target_deinit()
+declare i32 @__kmpc_target_init(ptr, i8, i1 zeroext)
+declare void @__kmpc_target_deinit(ptr nocapture readnone, i8)
declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64)
declare i32 @__kmpc_global_thread_num(ptr)
declare void @foo()
diff --git a/llvm/test/Transforms/OpenMP/nested_parallelism.ll b/llvm/test/Transforms/OpenMP/nested_parallelism.ll
index 03832b32c0b9a4..2e95323b994730 100644
--- a/llvm/test/Transforms/OpenMP/nested_parallelism.ll
+++ b/llvm/test/Transforms/OpenMP/nested_parallelism.ll
@@ -23,31 +23,31 @@
target triple = "nvptx64"
%struct.ident_t = type { i32, i32, i32, i32, ptr }
-%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8
+ at __omp_offloading_10302_bd7e0_main_l13_exec_mode = weak protected constant i8 3
+ at __omp_offloading_10302_bd7e0_main_l16_exec_mode = weak protected constant i8 1
@i_shared = internal addrspace(3) global [4 x i8] undef, align 16
@i.i_shared = internal addrspace(3) global [4 x i8] undef, align 16
-
- at __omp_offloading_10302_bd7e0_main_l13_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2 }, ptr @1, ptr null }
- at __omp_offloading_10302_bd7e0_main_l16_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
-
+ at llvm.compiler.used = appending global [2 x ptr] [ptr @__omp_offloading_10302_bd7e0_main_l13_exec_mode, ptr @__omp_offloading_10302_bd7e0_main_l16_exec_mode], section "llvm.metadata"
;.
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
+; CHECK: @[[__OMP_OFFLOADING_10302_BD7E0_MAIN_L13_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak protected constant i8 3
+; CHECK: @[[__OMP_OFFLOADING_10302_BD7E0_MAIN_L16_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak protected constant i8 3
; CHECK: @[[I_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 16
; CHECK: @[[I_I_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 16
-; CHECK: @[[__OMP_OFFLOADING_10302_BD7E0_MAIN_L13_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 2 }, ptr @[[GLOB1]], ptr null }
-; CHECK: @[[__OMP_OFFLOADING_10302_BD7E0_MAIN_L16_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 1, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
+; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @__omp_offloading_10302_bd7e0_main_l13_exec_mode, ptr @__omp_offloading_10302_bd7e0_main_l16_exec_mode], section "llvm.metadata"
+; CHECK: @[[__OMP_OFFLOADING_10302_BD7E0_MAIN_L13_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; CHECK: @[[__OMP_OFFLOADING_10302_BD7E0_MAIN_L16_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
;.
define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l13(i64 noundef %i) local_unnamed_addr "kernel" {
; CHECK-LABEL: @__omp_offloading_10302_bd7e0_main_l13(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS_I:%.*]] = alloca [1 x ptr], align 8
-; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_10302_bd7e0_main_l13_kernel_environment)
+; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 2, i1 false)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
@@ -67,12 +67,12 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l13(i64 nounde
; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
-; CHECK-NEXT: call void @__kmpc_target_deinit()
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2)
; CHECK-NEXT: br label [[COMMON_RET]]
;
entry:
%captured_vars_addrs.i = alloca [1 x ptr], align 8
- %0 = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_10302_bd7e0_main_l13_kernel_environment) #6
+ %0 = tail call i32 @__kmpc_target_init(ptr nonnull @1, i8 2, i1 false) #6
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
@@ -96,11 +96,11 @@ _Z3fooi.internalized.exit: ; preds = %user_code.entry, %r
store ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), ptr %captured_vars_addrs.i, align 8
call void @__kmpc_parallel_51(ptr nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull %captured_vars_addrs.i, i64 1) #6
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %captured_vars_addrs.i)
- call void @__kmpc_target_deinit() #6
+ call void @__kmpc_target_deinit(ptr nonnull @1, i8 2) #6
br label %common.ret
}
-declare i32 @__kmpc_target_init(ptr) local_unnamed_addr
+declare i32 @__kmpc_target_init(ptr, i8, i1) local_unnamed_addr
define hidden void @_Z3fooi(i32 noundef %i1) local_unnamed_addr #1 {
; CHECK-LABEL: @_Z3fooi(
@@ -131,7 +131,7 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l16(i64 nounde
; CHECK-LABEL: @__omp_offloading_10302_bd7e0_main_l16(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS_I:%.*]] = alloca [1 x ptr], align 8
-; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_10302_bd7e0_main_l16_kernel_environment)
+; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 2, i1 false)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
@@ -140,16 +140,29 @@ define weak_odr protected void @__omp_offloading_10302_bd7e0_main_l16(i64 nounde
; CHECK-NEXT: [[I_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[I:%.*]] to i32
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
+; CHECK-NEXT: br label [[REGION_CHECK_TID:%.*]]
+; CHECK: region.check.tid:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
+; CHECK: region.guarded:
; CHECK-NEXT: store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), align 16
+; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]]
+; CHECK: region.guarded.end:
+; CHECK-NEXT: br label [[REGION_BARRIER]]
+; CHECK: region.barrier:
+; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB1]], i32 [[TMP2]])
+; CHECK-NEXT: br label [[REGION_EXIT:%.*]]
+; CHECK: region.exit:
; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
-; CHECK-NEXT: call void @__kmpc_target_deinit()
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2)
; CHECK-NEXT: br label [[COMMON_RET]]
;
entry:
%captured_vars_addrs.i = alloca [1 x ptr], align 8
- %0 = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_10302_bd7e0_main_l16_kernel_environment) #6
+ %0 = tail call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #6
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
@@ -164,7 +177,7 @@ user_code.entry: ; preds = %entry
store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr %captured_vars_addrs.i, align 8
call void @__kmpc_parallel_51(ptr nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull %captured_vars_addrs.i, i64 1) #6
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %captured_vars_addrs.i)
- call void @__kmpc_target_deinit() #6
+ call void @__kmpc_target_deinit(ptr nonnull @1, i8 1) #6
br label %common.ret
}
diff --git a/llvm/test/Transforms/OpenMP/parallel_level_fold.ll b/llvm/test/Transforms/OpenMP/parallel_level_fold.ll
index ac8a49ddfbfd26..1b88bfc90dd5ae 100644
--- a/llvm/test/Transforms/OpenMP/parallel_level_fold.ll
+++ b/llvm/test/Transforms/OpenMP/parallel_level_fold.ll
@@ -2,65 +2,69 @@
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
target triple = "nvptx64"
-%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
+ at no_spmd_exec_mode = weak constant i8 1
+ at spmd_exec_mode = weak constant i8 0
+ at parallel_exec_mode = weak constant i8 0
@G = external global i16
- at none_spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }
- at spmd_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2 }, ptr null, ptr null }
- at parallel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2 }, ptr null, ptr null }
+ at llvm.compiler.used = appending global [3 x ptr] [ptr @no_spmd_exec_mode, ptr @spmd_exec_mode, ptr @parallel_exec_mode], section "llvm.metadata"
;.
+; CHECK: @[[NO_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; CHECK: @[[SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
+; CHECK: @[[PARALLEL_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i16
-; CHECK: @[[NONE_SPMD_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr null, ptr null }
-; CHECK: @[[SPMD_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 2 }, ptr null, ptr null }
-; CHECK: @[[PARALLEL_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 2 }, ptr null, ptr null }
+; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [3 x ptr] [ptr @no_spmd_exec_mode, ptr @spmd_exec_mode, ptr @parallel_exec_mode], section "llvm.metadata"
+; CHECK: @[[NONE_SPMD_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
+; CHECK: @[[SPMD_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
+; CHECK: @[[PARALLEL_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
;.
define weak void @none_spmd() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@none_spmd
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @none_spmd_kernel_environment)
+; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
; CHECK-NEXT: call void @none_spmd_helper()
; CHECK-NEXT: call void @mixed_helper()
-; CHECK-NEXT: call void @__kmpc_target_deinit()
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 1)
; CHECK-NEXT: ret void
;
- %i = call i32 @__kmpc_target_init(ptr @none_spmd_kernel_environment)
+ %i = call i32 @__kmpc_target_init(ptr null, i8 1, i1 false)
call void @none_spmd_helper()
call void @mixed_helper()
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr null, i8 1)
ret void
}
define weak void @spmd() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@spmd
; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_kernel_environment)
+; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
; CHECK-NEXT: call void @spmd_helper()
; CHECK-NEXT: call void @mixed_helper()
-; CHECK-NEXT: call void @__kmpc_target_deinit()
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2)
; CHECK-NEXT: ret void
;
- %i = call i32 @__kmpc_target_init(ptr @spmd_kernel_environment)
+ %i = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
call void @spmd_helper()
call void @mixed_helper()
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr null, i8 2)
ret void
}
define weak void @parallel() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@parallel
; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr @parallel_kernel_environment)
+; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
; CHECK-NEXT: call void @spmd_helper()
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr null, i32 0, i32 0, i32 0, i32 0, ptr null, ptr null, ptr null, i64 0)
-; CHECK-NEXT: call void @__kmpc_target_deinit()
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr null, i8 2)
; CHECK-NEXT: ret void
;
- %i = call i32 @__kmpc_target_init(ptr @parallel_kernel_environment)
+ %i = call i32 @__kmpc_target_init(ptr null, i8 2, i1 false)
call void @spmd_helper()
call void @__kmpc_parallel_51(ptr null, i32 0, i32 0, i32 0, i32 0, ptr null, ptr null, ptr null, i64 0)
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr null, i8 2)
ret void
}
@@ -132,8 +136,8 @@ define internal void @parallel_helper() {
declare void @foo()
declare void @bar()
declare zeroext i16 @__kmpc_parallel_level(ptr, i32)
-declare i32 @__kmpc_target_init(ptr) #1
-declare void @__kmpc_target_deinit() #1
+declare i32 @__kmpc_target_init(ptr, i8 zeroext, i1 zeroext) #1
+declare void @__kmpc_target_deinit(ptr nocapture readnone, i8 zeroext) #1
!llvm.module.flags = !{!0, !1}
!nvvm.annotations = !{!2, !3, !4}
diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll
index b864700e18abef..6ca5cb1177ff8a 100644
--- a/llvm/test/Transforms/OpenMP/remove_globalization.ll
+++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll
@@ -5,12 +5,6 @@
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64"
- at S = external local_unnamed_addr global ptr
-%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
- at kernel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr null, ptr null }
-
-
; UTC_ARGS: --disable
; CHECK-REMARKS: remark: remove_globalization.c:4:2: Could not move globalized variable to the stack. Variable is potentially captured in call. Mark parameter as `__attribute__((noescape))` to override.
; CHECK-REMARKS: remark: remove_globalization.c:2:2: Moving globalized variable to the stack.
@@ -18,57 +12,61 @@ target triple = "nvptx64"
; CHECK-REMARKS: remark: remove_globalization.c:4:2: Found thread data sharing on the GPU. Expect degraded performance due to data globalization.
; UTC_ARGS: --enable
+ at S = external local_unnamed_addr global ptr
+
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
+
; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
;.
; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global ptr
-; CHECK: @[[KERNEL_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr null, ptr null }
+; CHECK: @[[KERNEL_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
;.
; CHECK-DISABLED: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global ptr
-; CHECK-DISABLED: @[[KERNEL_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr null, ptr null }
+; CHECK-DISABLED: @[[KERNEL_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
;.
-define weak i32 @__kmpc_target_init(ptr %0) {
+define weak i32 @__kmpc_target_init(ptr, i8, i1) {
; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
-; CHECK-SAME: (ptr [[TMP0:%.*]]) {
+; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-NEXT: ret i32 0
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
-; CHECK-DISABLED-SAME: (ptr [[TMP0:%.*]]) {
+; CHECK-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-DISABLED-NEXT: ret i32 0
;
ret i32 0
}
-declare void @__kmpc_target_deinit()
+declare void @__kmpc_target_deinit(ptr, i8)
define void @kernel() "kernel" {
; CHECK-LABEL: define {{[^@]+}}@kernel
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment)
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull null, i8 1, i1 false)
; CHECK-NEXT: call void @foo() #[[ATTR1:[0-9]+]]
; CHECK-NEXT: call void @bar() #[[ATTR1]]
; CHECK-NEXT: call void @convert_and_move_alloca() #[[ATTR1]]
; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR4:[0-9]+]]
-; CHECK-NEXT: call void @__kmpc_target_deinit()
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr nonnull null, i8 1)
; CHECK-NEXT: ret void
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@kernel
; CHECK-DISABLED-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-DISABLED-NEXT: entry:
-; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment)
+; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull null, i8 1, i1 false)
; CHECK-DISABLED-NEXT: call void @foo() #[[ATTR1:[0-9]+]]
; CHECK-DISABLED-NEXT: call void @bar() #[[ATTR1]]
; CHECK-DISABLED-NEXT: call void @convert_and_move_alloca() #[[ATTR1]]
; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR4:[0-9]+]]
-; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr nonnull null, i8 1)
; CHECK-DISABLED-NEXT: ret void
;
entry:
- %0 = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr nonnull null, i8 1, i1 true)
call void @foo()
call void @bar()
call void @convert_and_move_alloca()
call void @unknown_no_openmp()
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr nonnull null, i8 1)
ret void
}
diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll
index 6469558728757c..358511dd386d6f 100644
--- a/llvm/test/Transforms/OpenMP/replace_globalization.ll
+++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll
@@ -14,30 +14,28 @@ target triple = "nvptx64"
; UTC_ARGS: --enable
%struct.ident_t = type { i32, i32, i32, i32, ptr }
-%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@S = external local_unnamed_addr global ptr
@0 = private unnamed_addr constant [113 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
- at foo_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr @1, ptr null }
- at bar_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr @1, ptr null }
- at baz_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2 }, ptr @1, ptr null }
+ at foo_exec_mode = weak constant i8 1
+ at bar_exec_mode = weak constant i8 1
+ at baz_spmd_exec_mode = weak constant i8 2
define dso_local void @foo() "kernel" {
entry:
- %c = call i32 @__kmpc_target_init(ptr @foo_kernel_environment)
+ %c = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
call void @unknown_no_openmp()
call void @use(ptr %x)
call void @__kmpc_free_shared(ptr %x, i64 4)
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
}
define void @bar() "kernel" {
- %c = call i32 @__kmpc_target_init(ptr @bar_kernel_environment)
+ %c = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
call void @unknown_no_openmp()
%cmp = icmp eq i32 %c, -1
br i1 %cmp, label %master1, label %exit
@@ -56,12 +54,12 @@ master2:
call void @__kmpc_free_shared(ptr %y, i64 4)
br label %exit
exit:
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
}
define void @baz_spmd() "kernel" {
- %c = call i32 @__kmpc_target_init(ptr @baz_kernel_environment)
+ %c = call i32 @__kmpc_target_init(ptr @1, i8 2, i1 true)
call void @unknown_no_openmp()
%c0 = icmp eq i32 %c, -1
br i1 %c0, label %master3, label %exit
@@ -71,7 +69,7 @@ master3:
call void @__kmpc_free_shared(ptr %z, i64 24)
br label %exit
exit:
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 2)
ret void
}
@@ -99,11 +97,11 @@ declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()
; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
-define weak i32 @__kmpc_target_init(ptr) {
+define weak i32 @__kmpc_target_init(ptr, i8, i1) {
ret i32 0
}
-declare void @__kmpc_target_deinit()
+declare void @__kmpc_target_deinit(ptr, i8)
declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
@@ -129,29 +127,32 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global ptr
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [113 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
-; CHECK: @[[FOO_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-; CHECK: @[[BAR_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-; CHECK: @[[BAZ_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 2 }, ptr @[[GLOB1]], ptr null }
+; CHECK: @[[FOO_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; CHECK: @[[BAR_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; CHECK: @[[BAZ_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2
; CHECK: @[[OFFSET:[a-zA-Z0-9_$"\\.-]+]] = global i32 undef
; CHECK: @[[STACK:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [1024 x i8] undef
+; CHECK: @[[FOO_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
+; CHECK: @[[BAR_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
+; CHECK: @[[BAZ_SPMD_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [16 x i8] poison, align 4
; CHECK: @[[Y_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
;.
; CHECK-LABEL: define {{[^@]+}}@foo
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @foo_kernel_environment)
+; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; CHECK-NEXT: [[X:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]]
; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR5:[0-9]+]]
; CHECK-NEXT: call void @use.internalized(ptr nofree [[X]]) #[[ATTR7:[0-9]+]]
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[X]], i64 4) #[[ATTR8:[0-9]+]]
-; CHECK-NEXT: call void @__kmpc_target_deinit()
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bar
; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @bar_kernel_environment)
+; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR5]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], -1
; CHECK-NEXT: br i1 [[CMP]], label [[MASTER1:%.*]], label [[EXIT:%.*]]
@@ -166,13 +167,13 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK-NEXT: call void @use.internalized(ptr nofree addrspacecast (ptr addrspace(3) @y_shared to ptr)) #[[ATTR7]]
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
-; CHECK-NEXT: call void @__kmpc_target_deinit()
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@baz_spmd
; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @baz_kernel_environment)
+; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 true)
; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR5]]
; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[C]], -1
; CHECK-NEXT: br i1 [[C0]], label [[MASTER3:%.*]], label [[EXIT:%.*]]
@@ -182,7 +183,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[Z]], i64 24) #[[ATTR8]]
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
-; CHECK-NEXT: call void @__kmpc_target_deinit()
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; CHECK-NEXT: ret void
;
;
@@ -210,7 +211,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
;
;
; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
-; CHECK-SAME: (ptr [[TMP0:%.*]]) {
+; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-NEXT: ret i32 0
;
;.
diff --git a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll
index 01f73714eb4869..580eb927117096 100644
--- a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll
+++ b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll
@@ -4,12 +4,10 @@
; ModuleID = 'single_threaded_exeuction.c'
%struct.ident_t = type { i32, i32, i32, i32, ptr }
-%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@0 = private unnamed_addr constant [1 x i8] c"\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
- at kernel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr @1, ptr null }
+ at kernel_exec_mode = weak constant i8 1
; CHECK-NOT: [openmp-opt] Basic block @kernel entry is executed by a single thread.
@@ -17,7 +15,7 @@
; CHECK-NOT: [openmp-opt] Basic block @kernel if.else is executed by a single thread.
; CHECK-NOT: [openmp-opt] Basic block @kernel if.end is executed by a single thread.
define void @kernel() "kernel" {
- %call = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment)
+ %call = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 false)
%cmp = icmp eq i32 %call, -1
br i1 %cmp, label %if.then, label %if.else
if.then:
@@ -25,7 +23,7 @@ if.then:
if.else:
br label %if.end
if.end:
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr null, i8 1)
ret void
}
@@ -108,9 +106,9 @@ declare i32 @llvm.amdgcn.workitem.id.x()
declare void @__kmpc_kernel_prepare_parallel(ptr)
-declare i32 @__kmpc_target_init(ptr)
+declare i32 @__kmpc_target_init(ptr, i8, i1)
-declare void @__kmpc_target_deinit()
+declare void @__kmpc_target_deinit(ptr, i8)
attributes #0 = { cold noinline }
diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll
index bb5fef5b726729..e8a1652787e9ff 100644
--- a/llvm/test/Transforms/OpenMP/spmdization.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU
; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=NVPTX
-; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED1
-; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED2
-; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=NVPTX-DISABLED1
-; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=NVPTX-DISABLED2
+; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED
+; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED
+; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=NVPTX-DISABLED
+; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=NVPTX-DISABLED
;; void unknown(void);
;; void spmd_amenable(void) __attribute__((assume("ompx_spmd_amenable")));
@@ -93,26 +93,74 @@
%struct.kmp_task_t = type { ptr, ptr, i32, %union.kmp_cmplrdata_t, %union.kmp_cmplrdata_t }
%union.kmp_cmplrdata_t = type { ptr }
%struct.anon = type {}
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
-%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
- at __omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
- at __omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
- at __omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
- at __omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
- at __omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
- at __omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
+ at __omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode = weak constant i8 1
+ at __omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode = weak constant i8 1
+ at __omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode = weak constant i8 1
+ at __omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode = weak constant i8 1
+ at __omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode = weak constant i8 1
+ at __omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode = weak constant i8 1
+ at llvm.compiler.used = appending global [6 x ptr] [ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata"
+; Function Attrs: alwaysinline convergent norecurse nounwind
+;.
+; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
+; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
+; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; AMDGPU: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x ptr] [ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata"
+; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5__DEBUG_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
+; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
+; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; AMDGPU: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
+; AMDGPU: @[[X_SHARED_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
+; AMDGPU: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
+;.
+; NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
+; NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
+; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; NVPTX: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x ptr] [ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata"
+; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5__DEBUG_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
+; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
+; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; NVPTX: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
+; NVPTX: @[[X_SHARED1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
+; NVPTX: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
+;.
; AMDGPU-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; AMDGPU-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
-; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
+; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; AMDGPU-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x ptr] [ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata"
+; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5__DEBUG_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
+; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; AMDGPU-DISABLED: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
; AMDGPU-DISABLED: @[[X_SHARED_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
; AMDGPU-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
@@ -120,14 +168,22 @@
; AMDGPU-DISABLED: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
; AMDGPU-DISABLED: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
; AMDGPU-DISABLED: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
+;.
; NVPTX-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; NVPTX-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
-; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
+; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; NVPTX-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x ptr] [ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata"
+; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5__DEBUG_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
+; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; NVPTX-DISABLED: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
; NVPTX-DISABLED: @[[X_SHARED1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
; NVPTX-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
@@ -136,96 +192,6 @@
; NVPTX-DISABLED: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
; NVPTX-DISABLED: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
;.
-; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
-; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
-; AMDGPU: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
-; AMDGPU: @[[X_SHARED_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
-; AMDGPU: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-;.
-; NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
-; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
-; NVPTX: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
-; NVPTX: @[[X_SHARED1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
-; NVPTX: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-;.
-; AMDGPU-DISABLED1: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; AMDGPU-DISABLED1: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
-; AMDGPU-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED1: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
-; AMDGPU-DISABLED1: @[[X_SHARED_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
-; AMDGPU-DISABLED1: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; AMDGPU-DISABLED1: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; AMDGPU-DISABLED1: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; AMDGPU-DISABLED1: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; AMDGPU-DISABLED1: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-;.
-; AMDGPU-DISABLED2: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; AMDGPU-DISABLED2: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
-; AMDGPU-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; AMDGPU-DISABLED2: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
-; AMDGPU-DISABLED2: @[[X_SHARED_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
-; AMDGPU-DISABLED2: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; AMDGPU-DISABLED2: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; AMDGPU-DISABLED2: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; AMDGPU-DISABLED2: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; AMDGPU-DISABLED2: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-;.
-; NVPTX-DISABLED1: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; NVPTX-DISABLED1: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
-; NVPTX-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED1: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED1: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
-; NVPTX-DISABLED1: @[[X_SHARED1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
-; NVPTX-DISABLED1: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; NVPTX-DISABLED1: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; NVPTX-DISABLED1: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; NVPTX-DISABLED1: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; NVPTX-DISABLED1: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-;.
-; NVPTX-DISABLED2: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; NVPTX-DISABLED2: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
-; NVPTX-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED2: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; NVPTX-DISABLED2: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
-; NVPTX-DISABLED2: @[[X_SHARED1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] poison, align 4
-; NVPTX-DISABLED2: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; NVPTX-DISABLED2: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; NVPTX-DISABLED2: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; NVPTX-DISABLED2: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-; NVPTX-DISABLED2: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
-;.
define weak void @__omp_offloading_fd02_2044372e_sequential_loop_l5() #0 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
; AMDGPU-SAME: () #[[ATTR0:[0-9]+]] {
@@ -237,34 +203,16 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_l5() #0 {
; NVPTX-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; NVPTX-NEXT: ret void
;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
-; AMDGPU-DISABLED1-SAME: () #[[ATTR0:[0-9]+]] {
-; AMDGPU-DISABLED1-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
-; AMDGPU-DISABLED1-NEXT: ret void
-;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
-; AMDGPU-DISABLED2-SAME: () #[[ATTR0:[0-9]+]] {
-; AMDGPU-DISABLED2-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
-; AMDGPU-DISABLED2-NEXT: ret void
-;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
-; NVPTX-DISABLED1-SAME: () #[[ATTR0:[0-9]+]] {
-; NVPTX-DISABLED1-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
-; NVPTX-DISABLED1-NEXT: ret void
-;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
-; NVPTX-DISABLED2-SAME: () #[[ATTR0:[0-9]+]] {
-; NVPTX-DISABLED2-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
-; NVPTX-DISABLED2-NEXT: ret void
-;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
; AMDGPU-DISABLED-SAME: () #[[ATTR0:[0-9]+]] {
; AMDGPU-DISABLED-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; AMDGPU-DISABLED-NEXT: ret void
+;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5
; NVPTX-DISABLED-SAME: () #[[ATTR0:[0-9]+]] {
; NVPTX-DISABLED-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; NVPTX-DISABLED-NEXT: ret void
+;
call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
ret void
}
@@ -275,7 +223,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU: common.ret:
@@ -284,7 +232,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
@@ -292,7 +240,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX: common.ret:
@@ -301,230 +249,16 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; NVPTX-NEXT: br label [[COMMON_RET]]
;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
-; AMDGPU-DISABLED1-SAME: () #[[ATTR1:[0-9]+]] {
-; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment)
-; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU-DISABLED1: is_worker_check:
-; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.begin:
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
-; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
-; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.finished:
-; AMDGPU-DISABLED1-NEXT: ret void
-; AMDGPU-DISABLED1: worker_state_machine.is_active.check:
-; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check:
-; AMDGPU-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__1_wrapper.ID
-; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute:
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.parallel_region.fallback.execute:
-; AMDGPU-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end:
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
-; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU-DISABLED1: worker_state_machine.done.barrier:
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU-DISABLED1: thread.user_code.check:
-; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED1: common.ret:
-; AMDGPU-DISABLED1-NEXT: ret void
-; AMDGPU-DISABLED1: user_code.entry:
-; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
-;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
-; AMDGPU-DISABLED2-SAME: () #[[ATTR1:[0-9]+]] {
-; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment)
-; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU-DISABLED2: is_worker_check:
-; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.begin:
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
-; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
-; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.finished:
-; AMDGPU-DISABLED2-NEXT: ret void
-; AMDGPU-DISABLED2: worker_state_machine.is_active.check:
-; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check:
-; AMDGPU-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__1_wrapper.ID
-; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.parallel_region.execute:
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.parallel_region.fallback.execute:
-; AMDGPU-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end:
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
-; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU-DISABLED2: worker_state_machine.done.barrier:
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU-DISABLED2: thread.user_code.check:
-; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED2: common.ret:
-; AMDGPU-DISABLED2-NEXT: ret void
-; AMDGPU-DISABLED2: user_code.entry:
-; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
-;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
-; NVPTX-DISABLED1-SAME: () #[[ATTR1:[0-9]+]] {
-; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment)
-; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX-DISABLED1: is_worker_check:
-; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.begin:
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
-; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.finished:
-; NVPTX-DISABLED1-NEXT: ret void
-; NVPTX-DISABLED1: worker_state_machine.is_active.check:
-; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.parallel_region.check:
-; NVPTX-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__1_wrapper.ID
-; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute:
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.parallel_region.fallback.execute:
-; NVPTX-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; NVPTX-DISABLED1: worker_state_machine.parallel_region.end:
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
-; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX-DISABLED1: worker_state_machine.done.barrier:
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX-DISABLED1: thread.user_code.check:
-; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED1: common.ret:
-; NVPTX-DISABLED1-NEXT: ret void
-; NVPTX-DISABLED1: user_code.entry:
-; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
-; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
-;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
-; NVPTX-DISABLED2-SAME: () #[[ATTR1:[0-9]+]] {
-; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment)
-; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX-DISABLED2: is_worker_check:
-; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.begin:
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
-; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.finished:
-; NVPTX-DISABLED2-NEXT: ret void
-; NVPTX-DISABLED2: worker_state_machine.is_active.check:
-; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.parallel_region.check:
-; NVPTX-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__1_wrapper.ID
-; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.parallel_region.execute:
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.parallel_region.fallback.execute:
-; NVPTX-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; NVPTX-DISABLED2: worker_state_machine.parallel_region.end:
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
-; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX-DISABLED2: worker_state_machine.done.barrier:
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX-DISABLED2: thread.user_code.check:
-; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED2: common.ret:
-; NVPTX-DISABLED2-NEXT: ret void
-; NVPTX-DISABLED2: user_code.entry:
-; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
-; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
-;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
; AMDGPU-DISABLED-SAME: () #[[ATTR1:[0-9]+]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU-DISABLED: is_worker_check:
@@ -568,16 +302,17 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]]
+;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
; NVPTX-DISABLED-SAME: () #[[ATTR1:[0-9]+]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX-DISABLED: is_worker_check:
@@ -620,13 +355,14 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]]
+;
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
@@ -638,7 +374,7 @@ user_code.entry: ; preds = %entry
store i32 0, ptr %.zero.addr, align 4
store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) #6
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
br label %common.ret
}
@@ -680,78 +416,6 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__
-; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
-; AMDGPU-DISABLED1: for.cond:
-; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
-; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU-DISABLED1: for.cond.cleanup:
-; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
-; AMDGPU-DISABLED1-NEXT: ret void
-; AMDGPU-DISABLED1: for.body:
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
-;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__
-; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
-; AMDGPU-DISABLED2: for.cond:
-; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
-; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU-DISABLED2: for.cond.cleanup:
-; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
-; AMDGPU-DISABLED2-NEXT: ret void
-; AMDGPU-DISABLED2: for.body:
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
-;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__
-; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
-; NVPTX-DISABLED1: for.cond:
-; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
-; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX-DISABLED1: for.cond.cleanup:
-; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
-; NVPTX-DISABLED1-NEXT: ret void
-; NVPTX-DISABLED1: for.body:
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
-;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__
-; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
-; NVPTX-DISABLED2: for.cond:
-; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
-; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX-DISABLED2: for.cond.cleanup:
-; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
-; NVPTX-DISABLED2-NEXT: ret void
-; NVPTX-DISABLED2: for.body:
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
-;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
@@ -769,6 +433,7 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
@@ -786,6 +451,7 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
+;
entry:
%captured_vars_addrs = alloca [0 x ptr], align 8
br label %for.cond
@@ -820,40 +486,18 @@ define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
; NVPTX-NEXT: ret void
;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1
-; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
-; AMDGPU-DISABLED1-NEXT: ret void
-;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1
-; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
-; AMDGPU-DISABLED2-NEXT: ret void
-;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1
-; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
-; NVPTX-DISABLED1-NEXT: ret void
-;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1
-; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
-; NVPTX-DISABLED2-NEXT: ret void
-;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
; AMDGPU-DISABLED-NEXT: ret void
+;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8:[0-9]+]]
; NVPTX-DISABLED-NEXT: ret void
+;
entry:
call void @unknown() #11
ret void
@@ -881,46 +525,6 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
-; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
-; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: ret void
-;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
-; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
-; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: ret void
-;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
-; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
-; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: ret void
-;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
-; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
-; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: ret void
-;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
; AMDGPU-DISABLED-NEXT: entry:
@@ -930,6 +534,7 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: ret void
+;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
; NVPTX-DISABLED-NEXT: entry:
@@ -939,6 +544,7 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: ret void
+;
entry:
%.addr1 = alloca i32, align 4
%.zero.addr = alloca i32, align 4
@@ -957,7 +563,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU: common.ret:
@@ -966,7 +572,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
@@ -974,7 +580,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX: common.ret:
@@ -983,230 +589,16 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; NVPTX-NEXT: br label [[COMMON_RET]]
;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
-; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
-; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment)
-; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU-DISABLED1: is_worker_check:
-; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.begin:
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
-; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
-; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.finished:
-; AMDGPU-DISABLED1-NEXT: ret void
-; AMDGPU-DISABLED1: worker_state_machine.is_active.check:
-; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check:
-; AMDGPU-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__3_wrapper.ID
-; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute:
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.parallel_region.fallback.execute:
-; AMDGPU-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end:
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
-; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU-DISABLED1: worker_state_machine.done.barrier:
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU-DISABLED1: thread.user_code.check:
-; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED1: common.ret:
-; AMDGPU-DISABLED1-NEXT: ret void
-; AMDGPU-DISABLED1: user_code.entry:
-; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
-;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
-; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
-; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment)
-; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU-DISABLED2: is_worker_check:
-; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.begin:
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
-; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
-; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.finished:
-; AMDGPU-DISABLED2-NEXT: ret void
-; AMDGPU-DISABLED2: worker_state_machine.is_active.check:
-; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check:
-; AMDGPU-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__3_wrapper.ID
-; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.parallel_region.execute:
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.parallel_region.fallback.execute:
-; AMDGPU-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end:
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
-; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU-DISABLED2: worker_state_machine.done.barrier:
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU-DISABLED2: thread.user_code.check:
-; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED2: common.ret:
-; AMDGPU-DISABLED2-NEXT: ret void
-; AMDGPU-DISABLED2: user_code.entry:
-; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
-;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
-; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
-; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment)
-; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX-DISABLED1: is_worker_check:
-; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.begin:
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
-; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.finished:
-; NVPTX-DISABLED1-NEXT: ret void
-; NVPTX-DISABLED1: worker_state_machine.is_active.check:
-; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.parallel_region.check:
-; NVPTX-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__3_wrapper.ID
-; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute:
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.parallel_region.fallback.execute:
-; NVPTX-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; NVPTX-DISABLED1: worker_state_machine.parallel_region.end:
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
-; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX-DISABLED1: worker_state_machine.done.barrier:
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX-DISABLED1: thread.user_code.check:
-; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED1: common.ret:
-; NVPTX-DISABLED1-NEXT: ret void
-; NVPTX-DISABLED1: user_code.entry:
-; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
-; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
-;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
-; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
-; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment)
-; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX-DISABLED2: is_worker_check:
-; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.begin:
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
-; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.finished:
-; NVPTX-DISABLED2-NEXT: ret void
-; NVPTX-DISABLED2: worker_state_machine.is_active.check:
-; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.parallel_region.check:
-; NVPTX-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__3_wrapper.ID
-; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.parallel_region.execute:
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.parallel_region.fallback.execute:
-; NVPTX-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; NVPTX-DISABLED2: worker_state_machine.parallel_region.end:
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
-; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX-DISABLED2: worker_state_machine.done.barrier:
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX-DISABLED2: thread.user_code.check:
-; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED2: common.ret:
-; NVPTX-DISABLED2-NEXT: ret void
-; NVPTX-DISABLED2: user_code.entry:
-; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
-; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
-;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
; AMDGPU-DISABLED-SAME: () #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU-DISABLED: is_worker_check:
@@ -1250,16 +642,17 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]]
+;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
; NVPTX-DISABLED-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX-DISABLED: is_worker_check:
@@ -1302,13 +695,14 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]]
+;
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
@@ -1320,7 +714,7 @@ user_code.entry: ; preds = %entry
store i32 0, ptr %.zero.addr, align 4
store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
call void @__omp_outlined__2(ptr %.threadid_temp., ptr %.zero.addr) #6
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
br label %common.ret
}
@@ -1367,88 +761,6 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__2
-; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
-; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-DISABLED1-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
-; AMDGPU-DISABLED1-NEXT: call void @use(ptr nocapture [[MALLOC_CAST]]) #[[ATTR7]]
-; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
-; AMDGPU-DISABLED1: for.cond:
-; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
-; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU-DISABLED1: for.cond.cleanup:
-; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
-; AMDGPU-DISABLED1-NEXT: ret void
-; AMDGPU-DISABLED1: for.body:
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
-;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__2
-; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
-; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-DISABLED2-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
-; AMDGPU-DISABLED2-NEXT: call void @use(ptr nocapture [[MALLOC_CAST]]) #[[ATTR7]]
-; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
-; AMDGPU-DISABLED2: for.cond:
-; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
-; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU-DISABLED2: for.cond.cleanup:
-; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
-; AMDGPU-DISABLED2-NEXT: ret void
-; AMDGPU-DISABLED2: for.body:
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
-;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__2
-; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4
-; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-DISABLED1-NEXT: call void @use(ptr nocapture [[X_H2S]]) #[[ATTR7]]
-; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
-; NVPTX-DISABLED1: for.cond:
-; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
-; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX-DISABLED1: for.cond.cleanup:
-; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
-; NVPTX-DISABLED1-NEXT: ret void
-; NVPTX-DISABLED1: for.body:
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
-;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__2
-; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4
-; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-DISABLED2-NEXT: call void @use(ptr nocapture [[X_H2S]]) #[[ATTR7]]
-; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
-; NVPTX-DISABLED2: for.cond:
-; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
-; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX-DISABLED2: for.cond.cleanup:
-; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
-; NVPTX-DISABLED2-NEXT: ret void
-; NVPTX-DISABLED2: for.body:
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
-;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
@@ -1469,6 +781,7 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
@@ -1488,6 +801,7 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+;
entry:
%captured_vars_addrs = alloca [0 x ptr], align 8
%x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
@@ -1524,40 +838,18 @@ define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-NEXT: ret void
;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3
-; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
-; AMDGPU-DISABLED1-NEXT: ret void
-;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3
-; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
-; AMDGPU-DISABLED2-NEXT: ret void
-;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3
-; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
-; NVPTX-DISABLED1-NEXT: ret void
-;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3
-; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
-; NVPTX-DISABLED2-NEXT: ret void
-;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]]
; AMDGPU-DISABLED-NEXT: ret void
+;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-DISABLED-NEXT: ret void
+;
entry:
call void @unknown() #11
ret void
@@ -1585,46 +877,6 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: ret void
-;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: ret void
-;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: ret void
-;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: ret void
-;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-DISABLED-NEXT: entry:
@@ -1634,6 +886,7 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: ret void
+;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-DISABLED-NEXT: entry:
@@ -1643,6 +896,7 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: ret void
+;
entry:
%.addr1 = alloca i32, align 4
%.zero.addr = alloca i32, align 4
@@ -1662,7 +916,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU: common.ret:
@@ -1671,7 +925,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
@@ -1679,7 +933,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX: common.ret:
@@ -1688,230 +942,16 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; NVPTX-NEXT: br label [[COMMON_RET]]
;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
-; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
-; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment)
-; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU-DISABLED1: is_worker_check:
-; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.begin:
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
-; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
-; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.finished:
-; AMDGPU-DISABLED1-NEXT: ret void
-; AMDGPU-DISABLED1: worker_state_machine.is_active.check:
-; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check:
-; AMDGPU-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__5_wrapper.ID
-; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute:
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.parallel_region.fallback.execute:
-; AMDGPU-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end:
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
-; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU-DISABLED1: worker_state_machine.done.barrier:
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU-DISABLED1: thread.user_code.check:
-; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED1: common.ret:
-; AMDGPU-DISABLED1-NEXT: ret void
-; AMDGPU-DISABLED1: user_code.entry:
-; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
-;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
-; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
-; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment)
-; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU-DISABLED2: is_worker_check:
-; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.begin:
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
-; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
-; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.finished:
-; AMDGPU-DISABLED2-NEXT: ret void
-; AMDGPU-DISABLED2: worker_state_machine.is_active.check:
-; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check:
-; AMDGPU-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__5_wrapper.ID
-; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.parallel_region.execute:
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.parallel_region.fallback.execute:
-; AMDGPU-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end:
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
-; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU-DISABLED2: worker_state_machine.done.barrier:
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU-DISABLED2: thread.user_code.check:
-; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED2: common.ret:
-; AMDGPU-DISABLED2-NEXT: ret void
-; AMDGPU-DISABLED2: user_code.entry:
-; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
-;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
-; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
-; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment)
-; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX-DISABLED1: is_worker_check:
-; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.begin:
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
-; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.finished:
-; NVPTX-DISABLED1-NEXT: ret void
-; NVPTX-DISABLED1: worker_state_machine.is_active.check:
-; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.parallel_region.check:
-; NVPTX-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__5_wrapper.ID
-; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute:
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.parallel_region.fallback.execute:
-; NVPTX-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; NVPTX-DISABLED1: worker_state_machine.parallel_region.end:
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
-; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX-DISABLED1: worker_state_machine.done.barrier:
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX-DISABLED1: thread.user_code.check:
-; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED1: common.ret:
-; NVPTX-DISABLED1-NEXT: ret void
-; NVPTX-DISABLED1: user_code.entry:
-; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
-; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
-;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
-; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
-; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment)
-; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX-DISABLED2: is_worker_check:
-; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.begin:
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
-; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.finished:
-; NVPTX-DISABLED2-NEXT: ret void
-; NVPTX-DISABLED2: worker_state_machine.is_active.check:
-; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.parallel_region.check:
-; NVPTX-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__5_wrapper.ID
-; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.parallel_region.execute:
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.parallel_region.fallback.execute:
-; NVPTX-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; NVPTX-DISABLED2: worker_state_machine.parallel_region.end:
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
-; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX-DISABLED2: worker_state_machine.done.barrier:
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX-DISABLED2: thread.user_code.check:
-; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED2: common.ret:
-; NVPTX-DISABLED2-NEXT: ret void
-; NVPTX-DISABLED2: user_code.entry:
-; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
-; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
-;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
; AMDGPU-DISABLED-SAME: () #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU-DISABLED: is_worker_check:
@@ -1955,16 +995,17 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]]
+;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
; NVPTX-DISABLED-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX-DISABLED: is_worker_check:
@@ -2007,13 +1048,14 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]]
+;
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
@@ -2025,7 +1067,7 @@ user_code.entry: ; preds = %entry
store i32 0, ptr %.zero.addr, align 4
store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
call void @__omp_outlined__4(ptr %.threadid_temp., ptr %.zero.addr) #6
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
br label %common.ret
}
@@ -2069,82 +1111,6 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4
-; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
-; AMDGPU-DISABLED1: for.cond:
-; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
-; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU-DISABLED1: for.cond.cleanup:
-; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
-; AMDGPU-DISABLED1-NEXT: ret void
-; AMDGPU-DISABLED1: for.body:
-; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
-;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4
-; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
-; AMDGPU-DISABLED2: for.cond:
-; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
-; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU-DISABLED2: for.cond.cleanup:
-; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
-; AMDGPU-DISABLED2-NEXT: ret void
-; AMDGPU-DISABLED2: for.body:
-; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
-;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4
-; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
-; NVPTX-DISABLED1: for.cond:
-; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
-; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX-DISABLED1: for.cond.cleanup:
-; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
-; NVPTX-DISABLED1-NEXT: ret void
-; NVPTX-DISABLED1: for.body:
-; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
-;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4
-; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
-; NVPTX-DISABLED2: for.cond:
-; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
-; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX-DISABLED2: for.cond.cleanup:
-; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
-; NVPTX-DISABLED2-NEXT: ret void
-; NVPTX-DISABLED2: for.body:
-; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]]
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
-;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
@@ -2163,6 +1129,7 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
@@ -2181,6 +1148,7 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+;
entry:
%captured_vars_addrs = alloca [1 x ptr], align 8
%x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
@@ -2224,42 +1192,6 @@ define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-NEXT: ret void
;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5
-; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
-; AMDGPU-DISABLED1-NEXT: ret void
-;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5
-; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
-; AMDGPU-DISABLED2-NEXT: ret void
-;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5
-; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
-; NVPTX-DISABLED1-NEXT: ret void
-;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5
-; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
-; NVPTX-DISABLED2-NEXT: ret void
-;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; AMDGPU-DISABLED-NEXT: entry:
@@ -2268,6 +1200,7 @@ define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]]
; AMDGPU-DISABLED-NEXT: ret void
+;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; NVPTX-DISABLED-NEXT: entry:
@@ -2276,6 +1209,7 @@ define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-DISABLED-NEXT: ret void
+;
entry:
%0 = load i32, ptr %x, align 4, !tbaa !18
%inc = add nsw i32 %0, 1
@@ -2310,54 +1244,6 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: ret void
-;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: ret void
-;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: ret void
-;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: ret void
-;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-DISABLED-NEXT: entry:
@@ -2369,6 +1255,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: ret void
+;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-DISABLED-NEXT: entry:
@@ -2380,6 +1267,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: ret void
+;
entry:
%.addr1 = alloca i32, align 4
%.zero.addr = alloca i32, align 4
@@ -2400,7 +1288,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; AMDGPU: common.ret:
@@ -2409,7 +1297,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
@@ -2417,7 +1305,7 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; NVPTX: common.ret:
@@ -2426,230 +1314,16 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; NVPTX-NEXT: br label [[COMMON_RET]]
;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
-; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
-; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment)
-; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU-DISABLED1: is_worker_check:
-; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.begin:
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
-; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
-; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.finished:
-; AMDGPU-DISABLED1-NEXT: ret void
-; AMDGPU-DISABLED1: worker_state_machine.is_active.check:
-; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check:
-; AMDGPU-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__7_wrapper.ID
-; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute:
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.parallel_region.fallback.execute:
-; AMDGPU-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end:
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
-; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU-DISABLED1: worker_state_machine.done.barrier:
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU-DISABLED1: thread.user_code.check:
-; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED1: common.ret:
-; AMDGPU-DISABLED1-NEXT: ret void
-; AMDGPU-DISABLED1: user_code.entry:
-; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
-;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
-; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
-; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment)
-; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU-DISABLED2: is_worker_check:
-; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.begin:
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
-; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
-; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.finished:
-; AMDGPU-DISABLED2-NEXT: ret void
-; AMDGPU-DISABLED2: worker_state_machine.is_active.check:
-; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check:
-; AMDGPU-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__7_wrapper.ID
-; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.parallel_region.execute:
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.parallel_region.fallback.execute:
-; AMDGPU-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end:
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
-; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU-DISABLED2: worker_state_machine.done.barrier:
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU-DISABLED2: thread.user_code.check:
-; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED2: common.ret:
-; AMDGPU-DISABLED2-NEXT: ret void
-; AMDGPU-DISABLED2: user_code.entry:
-; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
-;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
-; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
-; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment)
-; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX-DISABLED1: is_worker_check:
-; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.begin:
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
-; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.finished:
-; NVPTX-DISABLED1-NEXT: ret void
-; NVPTX-DISABLED1: worker_state_machine.is_active.check:
-; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.parallel_region.check:
-; NVPTX-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__7_wrapper.ID
-; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute:
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.parallel_region.fallback.execute:
-; NVPTX-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; NVPTX-DISABLED1: worker_state_machine.parallel_region.end:
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
-; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX-DISABLED1: worker_state_machine.done.barrier:
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX-DISABLED1: thread.user_code.check:
-; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED1: common.ret:
-; NVPTX-DISABLED1-NEXT: ret void
-; NVPTX-DISABLED1: user_code.entry:
-; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
-; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
-;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
-; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
-; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment)
-; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX-DISABLED2: is_worker_check:
-; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.begin:
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
-; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.finished:
-; NVPTX-DISABLED2-NEXT: ret void
-; NVPTX-DISABLED2: worker_state_machine.is_active.check:
-; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.parallel_region.check:
-; NVPTX-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__7_wrapper.ID
-; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.parallel_region.execute:
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.parallel_region.fallback.execute:
-; NVPTX-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; NVPTX-DISABLED2: worker_state_machine.parallel_region.end:
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
-; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX-DISABLED2: worker_state_machine.done.barrier:
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX-DISABLED2: thread.user_code.check:
-; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED2: common.ret:
-; NVPTX-DISABLED2-NEXT: ret void
-; NVPTX-DISABLED2: user_code.entry:
-; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
-; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
-;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
; AMDGPU-DISABLED-SAME: () #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU-DISABLED: is_worker_check:
@@ -2693,16 +1367,17 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]]
+;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
; NVPTX-DISABLED-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX-DISABLED: is_worker_check:
@@ -2745,13 +1420,14 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]]
+;
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
@@ -2763,7 +1439,7 @@ user_code.entry: ; preds = %entry
store i32 0, ptr %.zero.addr, align 4
store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
call void @__omp_outlined__6(ptr %.threadid_temp., ptr %.zero.addr) #6
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
br label %common.ret
}
@@ -2835,86 +1511,6 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6
-; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-; AMDGPU-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
-; AMDGPU-DISABLED1: for.cond:
-; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
-; AMDGPU-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU-DISABLED1: for.cond.cleanup:
-; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
-; AMDGPU-DISABLED1-NEXT: ret void
-; AMDGPU-DISABLED1: for.body:
-; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
-;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6
-; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-; AMDGPU-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
-; AMDGPU-DISABLED2: for.cond:
-; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
-; AMDGPU-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; AMDGPU-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; AMDGPU-DISABLED2: for.cond.cleanup:
-; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
-; AMDGPU-DISABLED2-NEXT: ret void
-; AMDGPU-DISABLED2: for.body:
-; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
-;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6
-; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-; NVPTX-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
-; NVPTX-DISABLED1: for.cond:
-; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
-; NVPTX-DISABLED1-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-DISABLED1-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX-DISABLED1: for.cond.cleanup:
-; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
-; NVPTX-DISABLED1-NEXT: ret void
-; NVPTX-DISABLED1: for.body:
-; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
-;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6
-; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
-; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-; NVPTX-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
-; NVPTX-DISABLED2: for.cond:
-; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
-; NVPTX-DISABLED2-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; NVPTX-DISABLED2-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; NVPTX-DISABLED2: for.cond.cleanup:
-; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
-; NVPTX-DISABLED2-NEXT: ret void
-; NVPTX-DISABLED2: for.body:
-; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]]
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
-; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
-; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
-;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
@@ -2934,6 +1530,7 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
@@ -2953,6 +1550,7 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+;
entry:
%captured_vars_addrs = alloca [1 x ptr], align 8
%x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
@@ -2997,42 +1595,6 @@ define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
; NVPTX-NEXT: ret void
;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7
-; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED1-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
-; AMDGPU-DISABLED1-NEXT: ret void
-;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7
-; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; AMDGPU-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
-; AMDGPU-DISABLED2-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
-; AMDGPU-DISABLED2-NEXT: ret void
-;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7
-; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-DISABLED1-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED1-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
-; NVPTX-DISABLED1-NEXT: ret void
-;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7
-; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
-; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
-; NVPTX-DISABLED2-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
-; NVPTX-DISABLED2-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
-; NVPTX-DISABLED2-NEXT: ret void
-;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; AMDGPU-DISABLED-NEXT: entry:
@@ -3041,6 +1603,7 @@ define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
; AMDGPU-DISABLED-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
; AMDGPU-DISABLED-NEXT: ret void
+;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; NVPTX-DISABLED-NEXT: entry:
@@ -3049,6 +1612,7 @@ define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]]
; NVPTX-DISABLED-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]]
; NVPTX-DISABLED-NEXT: ret void
+;
entry:
%0 = load i32, ptr %x, align 4, !tbaa !18
%inc = add nsw i32 %0, 1
@@ -3083,54 +1647,6 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: ret void
-;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: ret void
-;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: ret void
-;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
-; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: ret void
-;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-DISABLED-NEXT: entry:
@@ -3142,6 +1658,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: ret void
+;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-DISABLED-NEXT: entry:
@@ -3153,6 +1670,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]]
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: ret void
+;
entry:
%.addr1 = alloca i32, align 4
%.zero.addr = alloca i32, align 4
@@ -3174,7 +1692,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
@@ -3212,7 +1730,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
@@ -3221,7 +1739,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
@@ -3258,202 +1776,16 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; NVPTX-NEXT: br label [[COMMON_RET]]
;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
-; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
-; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment)
-; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU-DISABLED1: is_worker_check:
-; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.begin:
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
-; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
-; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.finished:
-; AMDGPU-DISABLED1-NEXT: ret void
-; AMDGPU-DISABLED1: worker_state_machine.is_active.check:
-; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.parallel_region.fallback.execute:
-; AMDGPU-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end:
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
-; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU-DISABLED1: worker_state_machine.done.barrier:
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU-DISABLED1: thread.user_code.check:
-; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED1: common.ret:
-; AMDGPU-DISABLED1-NEXT: ret void
-; AMDGPU-DISABLED1: user_code.entry:
-; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
-;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
-; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
-; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment)
-; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU-DISABLED2: is_worker_check:
-; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.begin:
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
-; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
-; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.finished:
-; AMDGPU-DISABLED2-NEXT: ret void
-; AMDGPU-DISABLED2: worker_state_machine.is_active.check:
-; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.parallel_region.fallback.execute:
-; AMDGPU-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end:
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
-; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU-DISABLED2: worker_state_machine.done.barrier:
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU-DISABLED2: thread.user_code.check:
-; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED2: common.ret:
-; AMDGPU-DISABLED2-NEXT: ret void
-; AMDGPU-DISABLED2: user_code.entry:
-; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
-;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
-; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
-; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment)
-; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX-DISABLED1: is_worker_check:
-; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.begin:
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
-; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.finished:
-; NVPTX-DISABLED1-NEXT: ret void
-; NVPTX-DISABLED1: worker_state_machine.is_active.check:
-; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.parallel_region.fallback.execute:
-; NVPTX-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.parallel_region.end:
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
-; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX-DISABLED1: worker_state_machine.done.barrier:
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX-DISABLED1: thread.user_code.check:
-; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED1: common.ret:
-; NVPTX-DISABLED1-NEXT: ret void
-; NVPTX-DISABLED1: user_code.entry:
-; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
-; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
-;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
-; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
-; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment)
-; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX-DISABLED2: is_worker_check:
-; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.begin:
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
-; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.finished:
-; NVPTX-DISABLED2-NEXT: ret void
-; NVPTX-DISABLED2: worker_state_machine.is_active.check:
-; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.parallel_region.fallback.execute:
-; NVPTX-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.parallel_region.end:
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
-; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX-DISABLED2: worker_state_machine.done.barrier:
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX-DISABLED2: thread.user_code.check:
-; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED2: common.ret:
-; NVPTX-DISABLED2-NEXT: ret void
-; NVPTX-DISABLED2: user_code.entry:
-; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
-; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
-;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
; AMDGPU-DISABLED-SAME: () #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU-DISABLED: is_worker_check:
@@ -3489,17 +1821,18 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0
; AMDGPU-DISABLED: common.ret:
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: user_code.entry:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]]
+;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
; NVPTX-DISABLED-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX-DISABLED: is_worker_check:
@@ -3534,14 +1867,15 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0
; NVPTX-DISABLED: common.ret:
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: user_code.entry:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]]
+;
entry:
%.zero.addr = alloca i32, align 4
%.threadid_temp. = alloca i32, align 4
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
@@ -3553,7 +1887,7 @@ user_code.entry: ; preds = %entry
store i32 0, ptr %.zero.addr, align 4
store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
call void @__omp_outlined__8(ptr %.threadid_temp., ptr %.zero.addr) #6
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
br label %common.ret
}
@@ -3571,40 +1905,18 @@ define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-NEXT: ret void
;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__8
-; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
-; AMDGPU-DISABLED1-NEXT: ret void
-;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__8
-; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
-; AMDGPU-DISABLED2-NEXT: ret void
-;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__8
-; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
-; NVPTX-DISABLED1-NEXT: ret void
-;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__8
-; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
-; NVPTX-DISABLED2-NEXT: ret void
-;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]]
; AMDGPU-DISABLED-NEXT: ret void
+;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-DISABLED-NEXT: ret void
+;
entry:
call void @unknown() #11
ret void
@@ -3617,7 +1929,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment)
+; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU: is_worker_check:
@@ -3663,7 +1975,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
; AMDGPU-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
; AMDGPU-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
@@ -3671,7 +1983,7 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment)
+; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX: is_worker_check:
@@ -3716,229 +2028,15 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
; NVPTX-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
; NVPTX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; NVPTX-NEXT: br label [[COMMON_RET]]
;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
-; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
-; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment)
-; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU-DISABLED1: is_worker_check:
-; AMDGPU-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; AMDGPU-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; AMDGPU-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.begin:
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
-; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
-; AMDGPU-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.finished:
-; AMDGPU-DISABLED1-NEXT: ret void
-; AMDGPU-DISABLED1: worker_state_machine.is_active.check:
-; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.parallel_region.check:
-; AMDGPU-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__9_wrapper.ID
-; AMDGPU-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.parallel_region.execute:
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU-DISABLED1: worker_state_machine.parallel_region.fallback.execute:
-; AMDGPU-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; AMDGPU-DISABLED1: worker_state_machine.parallel_region.end:
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
-; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU-DISABLED1: worker_state_machine.done.barrier:
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU-DISABLED1: thread.user_code.check:
-; AMDGPU-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED1: common.ret:
-; AMDGPU-DISABLED1-NEXT: ret void
-; AMDGPU-DISABLED1: user_code.entry:
-; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
-;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
-; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
-; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment)
-; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; AMDGPU-DISABLED2: is_worker_check:
-; AMDGPU-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; AMDGPU-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; AMDGPU-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; AMDGPU-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; AMDGPU-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.begin:
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
-; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
-; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
-; AMDGPU-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
-; AMDGPU-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.finished:
-; AMDGPU-DISABLED2-NEXT: ret void
-; AMDGPU-DISABLED2: worker_state_machine.is_active.check:
-; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.parallel_region.check:
-; AMDGPU-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__9_wrapper.ID
-; AMDGPU-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.parallel_region.execute:
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; AMDGPU-DISABLED2: worker_state_machine.parallel_region.fallback.execute:
-; AMDGPU-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; AMDGPU-DISABLED2: worker_state_machine.parallel_region.end:
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
-; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; AMDGPU-DISABLED2: worker_state_machine.done.barrier:
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; AMDGPU-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; AMDGPU-DISABLED2: thread.user_code.check:
-; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; AMDGPU-DISABLED2: common.ret:
-; AMDGPU-DISABLED2-NEXT: ret void
-; AMDGPU-DISABLED2: user_code.entry:
-; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
-; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
-;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
-; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
-; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment)
-; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX-DISABLED1: is_worker_check:
-; NVPTX-DISABLED1-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; NVPTX-DISABLED1-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; NVPTX-DISABLED1-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; NVPTX-DISABLED1-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.begin:
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
-; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.finished:
-; NVPTX-DISABLED1-NEXT: ret void
-; NVPTX-DISABLED1: worker_state_machine.is_active.check:
-; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.parallel_region.check:
-; NVPTX-DISABLED1-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__9_wrapper.ID
-; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.parallel_region.execute:
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX-DISABLED1: worker_state_machine.parallel_region.fallback.execute:
-; NVPTX-DISABLED1-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; NVPTX-DISABLED1: worker_state_machine.parallel_region.end:
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_kernel_end_parallel()
-; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX-DISABLED1: worker_state_machine.done.barrier:
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX-DISABLED1: thread.user_code.check:
-; NVPTX-DISABLED1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED1: common.ret:
-; NVPTX-DISABLED1-NEXT: ret void
-; NVPTX-DISABLED1: user_code.entry:
-; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
-; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
-;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
-; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
-; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment)
-; NVPTX-DISABLED2-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; NVPTX-DISABLED2: is_worker_check:
-; NVPTX-DISABLED2-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-; NVPTX-DISABLED2-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
-; NVPTX-DISABLED2-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
-; NVPTX-DISABLED2-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
-; NVPTX-DISABLED2-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.begin:
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED2-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
-; NVPTX-DISABLED2-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
-; NVPTX-DISABLED2-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
-; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.finished:
-; NVPTX-DISABLED2-NEXT: ret void
-; NVPTX-DISABLED2: worker_state_machine.is_active.check:
-; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.parallel_region.check:
-; NVPTX-DISABLED2-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq ptr [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__9_wrapper.ID
-; NVPTX-DISABLED2-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.parallel_region.execute:
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; NVPTX-DISABLED2: worker_state_machine.parallel_region.fallback.execute:
-; NVPTX-DISABLED2-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; NVPTX-DISABLED2: worker_state_machine.parallel_region.end:
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_kernel_end_parallel()
-; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; NVPTX-DISABLED2: worker_state_machine.done.barrier:
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED2-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
-; NVPTX-DISABLED2: thread.user_code.check:
-; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
-; NVPTX-DISABLED2: common.ret:
-; NVPTX-DISABLED2-NEXT: ret void
-; NVPTX-DISABLED2: user_code.entry:
-; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
-; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
-;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
; AMDGPU-DISABLED-SAME: () #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment)
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; AMDGPU-DISABLED: is_worker_check:
@@ -3984,14 +2082,15 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]]
+;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
; NVPTX-DISABLED-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment)
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; NVPTX-DISABLED: is_worker_check:
@@ -4036,11 +2135,12 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 {
; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
+; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]]
+;
entry:
%captured_vars_addrs = alloca [0 x ptr], align 8
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
@@ -4052,7 +2152,7 @@ user_code.entry: ; preds = %entry
%2 = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %1, i32 1, i64 40, i64 0, ptr @"_omp_task_entry$")
%3 = call i32 @__kmpc_omp_task(ptr @1, i32 %1, ptr %2)
call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper, ptr %captured_vars_addrs, i64 0)
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
br label %common.ret
}
@@ -4070,40 +2170,18 @@ define internal void @.omp_outlined.(i32 %.global_tid., ptr noalias %.part_id.,
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-NEXT: ret void
;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@.omp_outlined.
-; AMDGPU-DISABLED1-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
-; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
-; AMDGPU-DISABLED1-NEXT: ret void
-;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@.omp_outlined.
-; AMDGPU-DISABLED2-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
-; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
-; AMDGPU-DISABLED2-NEXT: ret void
-;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@.omp_outlined.
-; NVPTX-DISABLED1-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
-; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
-; NVPTX-DISABLED1-NEXT: ret void
-;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@.omp_outlined.
-; NVPTX-DISABLED2-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
-; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
-; NVPTX-DISABLED2-NEXT: ret void
-;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined.
; AMDGPU-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED-NEXT: ret void
+;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined.
; NVPTX-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED-NEXT: ret void
+;
entry:
call void @spmd_amenable() #10
ret void
@@ -4141,37 +2219,23 @@ declare void @unknowni32p(ptr) #2
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
-define weak i32 @__kmpc_target_init(ptr) {
+define weak i32 @__kmpc_target_init(ptr, i8, i1) {
; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init
-; AMDGPU-SAME: (ptr [[TMP0:%.*]]) {
+; AMDGPU-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; AMDGPU-NEXT: ret i32 0
;
; NVPTX-LABEL: define {{[^@]+}}@__kmpc_target_init
-; NVPTX-SAME: (ptr [[TMP0:%.*]]) {
+; NVPTX-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; NVPTX-NEXT: ret i32 0
;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__kmpc_target_init
-; AMDGPU-DISABLED1-SAME: (ptr [[TMP0:%.*]]) {
-; AMDGPU-DISABLED1-NEXT: ret i32 0
-;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__kmpc_target_init
-; AMDGPU-DISABLED2-SAME: (ptr [[TMP0:%.*]]) {
-; AMDGPU-DISABLED2-NEXT: ret i32 0
-;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__kmpc_target_init
-; NVPTX-DISABLED1-SAME: (ptr [[TMP0:%.*]]) {
-; NVPTX-DISABLED1-NEXT: ret i32 0
-;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__kmpc_target_init
-; NVPTX-DISABLED2-SAME: (ptr [[TMP0:%.*]]) {
-; NVPTX-DISABLED2-NEXT: ret i32 0
-;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
-; AMDGPU-DISABLED-SAME: (ptr [[TMP0:%.*]]) {
+; AMDGPU-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; AMDGPU-DISABLED-NEXT: ret i32 0
+;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
-; NVPTX-DISABLED-SAME: (ptr [[TMP0:%.*]]) {
+; NVPTX-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; NVPTX-DISABLED-NEXT: ret i32 0
+;
ret i32 0
}
@@ -4189,7 +2253,7 @@ declare void @spmd_amenable() #5
; Function Attrs: nounwind
declare i32 @__kmpc_global_thread_num(ptr) #6
-declare void @__kmpc_target_deinit()
+declare void @__kmpc_target_deinit(ptr, i8)
; Function Attrs: alwaysinline convergent norecurse nounwind
@@ -4206,40 +2270,18 @@ define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-NEXT: ret void
;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9
-; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
-; AMDGPU-DISABLED1-NEXT: ret void
-;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9
-; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
-; AMDGPU-DISABLED2-NEXT: ret void
-;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9
-; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: call void @unknown() #[[ATTR8]]
-; NVPTX-DISABLED1-NEXT: ret void
-;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9
-; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
-; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: call void @unknown() #[[ATTR8]]
-; NVPTX-DISABLED2-NEXT: ret void
-;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; AMDGPU-DISABLED-NEXT: entry:
; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]]
; AMDGPU-DISABLED-NEXT: ret void
+;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; NVPTX-DISABLED-NEXT: entry:
; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]]
; NVPTX-DISABLED-NEXT: ret void
+;
entry:
call void @unknown() #11
ret void
@@ -4267,46 +2309,6 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
-; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
-; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: ret void
-;
-; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
-; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: ret void
-;
-; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
-; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: ret void
-;
-; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
-; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
-; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: ret void
-;
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-DISABLED-NEXT: entry:
@@ -4316,6 +2318,7 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 {
; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; AMDGPU-DISABLED-NEXT: ret void
+;
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-DISABLED-NEXT: entry:
@@ -4325,6 +2328,7 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 {
; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
; NVPTX-DISABLED-NEXT: ret void
+;
entry:
%.addr1 = alloca i32, align 4
%.zero.addr = alloca i32, align 4
@@ -4389,92 +2393,6 @@ attributes #11 = { convergent }
!30 = !{!31, !27, i64 0}
!31 = !{!"kmp_task_t_with_privates", !32, i64 0}
!32 = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32}
-; AMDGPU-DISABLED: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind }
-; AMDGPU-DISABLED: attributes #[[ATTR1]] = { norecurse }
-; AMDGPU-DISABLED: attributes #[[ATTR2]] = { convergent norecurse nounwind }
-; AMDGPU-DISABLED: attributes #[[ATTR3]] = { alwaysinline norecurse nounwind }
-; AMDGPU-DISABLED: attributes #[[ATTR4]] = { alwaysinline convergent nounwind }
-; AMDGPU-DISABLED: attributes #[[ATTR5]] = { nounwind }
-; AMDGPU-DISABLED: attributes #[[ATTR6:[0-9]+]] = { nosync nounwind }
-; AMDGPU-DISABLED: attributes #[[ATTR7:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
-; AMDGPU-DISABLED: attributes #[[ATTR8]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
-; AMDGPU-DISABLED: attributes #[[ATTR9]] = { convergent }
-; AMDGPU-DISABLED: attributes #[[ATTR10:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
-; AMDGPU-DISABLED: attributes #[[ATTR11:[0-9]+]] = { alwaysinline }
-; AMDGPU-DISABLED: attributes #[[ATTR12:[0-9]+]] = { convergent nounwind }
-; NVPTX-DISABLED: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind }
-; NVPTX-DISABLED: attributes #[[ATTR1]] = { norecurse }
-; NVPTX-DISABLED: attributes #[[ATTR2]] = { convergent norecurse nounwind }
-; NVPTX-DISABLED: attributes #[[ATTR3]] = { alwaysinline norecurse nounwind }
-; NVPTX-DISABLED: attributes #[[ATTR4]] = { alwaysinline convergent nounwind }
-; NVPTX-DISABLED: attributes #[[ATTR5]] = { nounwind }
-; NVPTX-DISABLED: attributes #[[ATTR6:[0-9]+]] = { nosync nounwind }
-; NVPTX-DISABLED: attributes #[[ATTR7:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
-; NVPTX-DISABLED: attributes #[[ATTR8]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
-; NVPTX-DISABLED: attributes #[[ATTR9]] = { convergent }
-; NVPTX-DISABLED: attributes #[[ATTR10:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
-; NVPTX-DISABLED: attributes #[[ATTR11:[0-9]+]] = { alwaysinline }
-; NVPTX-DISABLED: attributes #[[ATTR12:[0-9]+]] = { convergent nounwind }
-; AMDGPU-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
-; AMDGPU-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
-; AMDGPU-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
-; AMDGPU-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
-; AMDGPU-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
-; AMDGPU-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; AMDGPU-DISABLED: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
-; AMDGPU-DISABLED: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU-DISABLED: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU-DISABLED: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; AMDGPU-DISABLED: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; AMDGPU-DISABLED: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; AMDGPU-DISABLED: [[META17:![0-9]+]] = !{!"clang version 14.0.0"}
-; AMDGPU-DISABLED: [[TBAA18]] = !{!19, !19, i64 0}
-; AMDGPU-DISABLED: [[META19:![0-9]+]] = !{!"int", !20, i64 0}
-; AMDGPU-DISABLED: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0}
-; AMDGPU-DISABLED: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"}
-; AMDGPU-DISABLED: [[LOOP22]] = distinct !{!22, !23, !24}
-; AMDGPU-DISABLED: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"}
-; AMDGPU-DISABLED: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"}
-; AMDGPU-DISABLED: [[LOOP25]] = distinct !{!25, !23, !24}
-; AMDGPU-DISABLED: [[TBAA26]] = !{!27, !27, i64 0}
-; AMDGPU-DISABLED: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0}
-; AMDGPU-DISABLED: [[LOOP28]] = distinct !{!28, !23, !24}
-; AMDGPU-DISABLED: [[LOOP29]] = distinct !{!29, !23, !24}
-; NVPTX-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
-; NVPTX-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
-; NVPTX-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
-; NVPTX-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
-; NVPTX-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
-; NVPTX-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; NVPTX-DISABLED: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
-; NVPTX-DISABLED: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX-DISABLED: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX-DISABLED: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; NVPTX-DISABLED: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; NVPTX-DISABLED: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; NVPTX-DISABLED: [[META17:![0-9]+]] = !{!"clang version 14.0.0"}
-; NVPTX-DISABLED: [[TBAA18]] = !{!19, !19, i64 0}
-; NVPTX-DISABLED: [[META19:![0-9]+]] = !{!"int", !20, i64 0}
-; NVPTX-DISABLED: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0}
-; NVPTX-DISABLED: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"}
-; NVPTX-DISABLED: [[LOOP22]] = distinct !{!22, !23, !24}
-; NVPTX-DISABLED: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"}
-; NVPTX-DISABLED: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"}
-; NVPTX-DISABLED: [[LOOP25]] = distinct !{!25, !23, !24}
-; NVPTX-DISABLED: [[TBAA26]] = !{!27, !27, i64 0}
-; NVPTX-DISABLED: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0}
-; NVPTX-DISABLED: [[LOOP28]] = distinct !{!28, !23, !24}
-; NVPTX-DISABLED: [[LOOP29]] = distinct !{!29, !23, !24}
;.
; AMDGPU: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
; AMDGPU: attributes #[[ATTR1]] = { norecurse }
@@ -4502,57 +2420,31 @@ attributes #11 = { convergent }
; NVPTX: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
; NVPTX: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
;.
-; AMDGPU-DISABLED1: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
-; AMDGPU-DISABLED1: attributes #[[ATTR1]] = { norecurse }
-; AMDGPU-DISABLED1: attributes #[[ATTR2]] = { convergent norecurse nounwind }
-; AMDGPU-DISABLED1: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
-; AMDGPU-DISABLED1: attributes #[[ATTR4]] = { nounwind }
-; AMDGPU-DISABLED1: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
-; AMDGPU-DISABLED1: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
-; AMDGPU-DISABLED1: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
-; AMDGPU-DISABLED1: attributes #[[ATTR8]] = { convergent }
-; AMDGPU-DISABLED1: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
-; AMDGPU-DISABLED1: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
-; AMDGPU-DISABLED1: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
-;.
-; AMDGPU-DISABLED2: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
-; AMDGPU-DISABLED2: attributes #[[ATTR1]] = { norecurse }
-; AMDGPU-DISABLED2: attributes #[[ATTR2]] = { convergent norecurse nounwind }
-; AMDGPU-DISABLED2: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
-; AMDGPU-DISABLED2: attributes #[[ATTR4]] = { nounwind }
-; AMDGPU-DISABLED2: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
-; AMDGPU-DISABLED2: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
-; AMDGPU-DISABLED2: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
-; AMDGPU-DISABLED2: attributes #[[ATTR8]] = { convergent }
-; AMDGPU-DISABLED2: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
-; AMDGPU-DISABLED2: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
-; AMDGPU-DISABLED2: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
-;.
-; NVPTX-DISABLED1: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
-; NVPTX-DISABLED1: attributes #[[ATTR1]] = { norecurse }
-; NVPTX-DISABLED1: attributes #[[ATTR2]] = { convergent norecurse nounwind }
-; NVPTX-DISABLED1: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
-; NVPTX-DISABLED1: attributes #[[ATTR4]] = { nounwind }
-; NVPTX-DISABLED1: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
-; NVPTX-DISABLED1: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
-; NVPTX-DISABLED1: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
-; NVPTX-DISABLED1: attributes #[[ATTR8]] = { convergent }
-; NVPTX-DISABLED1: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
-; NVPTX-DISABLED1: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
-; NVPTX-DISABLED1: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
+; AMDGPU-DISABLED: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
+; AMDGPU-DISABLED: attributes #[[ATTR1]] = { norecurse }
+; AMDGPU-DISABLED: attributes #[[ATTR2]] = { convergent norecurse nounwind }
+; AMDGPU-DISABLED: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
+; AMDGPU-DISABLED: attributes #[[ATTR4]] = { nounwind }
+; AMDGPU-DISABLED: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
+; AMDGPU-DISABLED: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
+; AMDGPU-DISABLED: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
+; AMDGPU-DISABLED: attributes #[[ATTR8]] = { convergent }
+; AMDGPU-DISABLED: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+; AMDGPU-DISABLED: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
+; AMDGPU-DISABLED: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
;.
-; NVPTX-DISABLED2: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
-; NVPTX-DISABLED2: attributes #[[ATTR1]] = { norecurse }
-; NVPTX-DISABLED2: attributes #[[ATTR2]] = { convergent norecurse nounwind }
-; NVPTX-DISABLED2: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
-; NVPTX-DISABLED2: attributes #[[ATTR4]] = { nounwind }
-; NVPTX-DISABLED2: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
-; NVPTX-DISABLED2: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
-; NVPTX-DISABLED2: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
-; NVPTX-DISABLED2: attributes #[[ATTR8]] = { convergent }
-; NVPTX-DISABLED2: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
-; NVPTX-DISABLED2: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
-; NVPTX-DISABLED2: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
+; NVPTX-DISABLED: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
+; NVPTX-DISABLED: attributes #[[ATTR1]] = { norecurse }
+; NVPTX-DISABLED: attributes #[[ATTR2]] = { convergent norecurse nounwind }
+; NVPTX-DISABLED: attributes #[[ATTR3]] = { alwaysinline convergent nounwind }
+; NVPTX-DISABLED: attributes #[[ATTR4]] = { nounwind }
+; NVPTX-DISABLED: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind }
+; NVPTX-DISABLED: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) }
+; NVPTX-DISABLED: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" }
+; NVPTX-DISABLED: attributes #[[ATTR8]] = { convergent }
+; NVPTX-DISABLED: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+; NVPTX-DISABLED: attributes #[[ATTR10:[0-9]+]] = { alwaysinline }
+; NVPTX-DISABLED: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind }
;.
; AMDGPU: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
; AMDGPU: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
@@ -4616,127 +2508,65 @@ attributes #11 = { convergent }
; NVPTX: [[LOOP28]] = distinct !{!28, !23, !24}
; NVPTX: [[LOOP29]] = distinct !{!29, !23, !24}
;.
-; AMDGPU-DISABLED1: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
-; AMDGPU-DISABLED1: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
-; AMDGPU-DISABLED1: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
-; AMDGPU-DISABLED1: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
-; AMDGPU-DISABLED1: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
-; AMDGPU-DISABLED1: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; AMDGPU-DISABLED1: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-; AMDGPU-DISABLED1: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-; AMDGPU-DISABLED1: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-; AMDGPU-DISABLED1: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-; AMDGPU-DISABLED1: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-; AMDGPU-DISABLED1: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
-; AMDGPU-DISABLED1: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU-DISABLED1: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU-DISABLED1: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; AMDGPU-DISABLED1: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; AMDGPU-DISABLED1: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; AMDGPU-DISABLED1: [[META17:![0-9]+]] = !{!"clang version 14.0.0"}
-; AMDGPU-DISABLED1: [[TBAA18]] = !{!19, !19, i64 0}
-; AMDGPU-DISABLED1: [[META19:![0-9]+]] = !{!"int", !20, i64 0}
-; AMDGPU-DISABLED1: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0}
-; AMDGPU-DISABLED1: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"}
-; AMDGPU-DISABLED1: [[LOOP22]] = distinct !{!22, !23, !24}
-; AMDGPU-DISABLED1: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"}
-; AMDGPU-DISABLED1: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"}
-; AMDGPU-DISABLED1: [[LOOP25]] = distinct !{!25, !23, !24}
-; AMDGPU-DISABLED1: [[TBAA26]] = !{!27, !27, i64 0}
-; AMDGPU-DISABLED1: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0}
-; AMDGPU-DISABLED1: [[LOOP28]] = distinct !{!28, !23, !24}
-; AMDGPU-DISABLED1: [[LOOP29]] = distinct !{!29, !23, !24}
-;.
-; AMDGPU-DISABLED2: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
-; AMDGPU-DISABLED2: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
-; AMDGPU-DISABLED2: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
-; AMDGPU-DISABLED2: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
-; AMDGPU-DISABLED2: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
-; AMDGPU-DISABLED2: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; AMDGPU-DISABLED2: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-; AMDGPU-DISABLED2: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-; AMDGPU-DISABLED2: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-; AMDGPU-DISABLED2: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-; AMDGPU-DISABLED2: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-; AMDGPU-DISABLED2: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
-; AMDGPU-DISABLED2: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; AMDGPU-DISABLED2: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; AMDGPU-DISABLED2: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; AMDGPU-DISABLED2: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; AMDGPU-DISABLED2: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; AMDGPU-DISABLED2: [[META17:![0-9]+]] = !{!"clang version 14.0.0"}
-; AMDGPU-DISABLED2: [[TBAA18]] = !{!19, !19, i64 0}
-; AMDGPU-DISABLED2: [[META19:![0-9]+]] = !{!"int", !20, i64 0}
-; AMDGPU-DISABLED2: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0}
-; AMDGPU-DISABLED2: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"}
-; AMDGPU-DISABLED2: [[LOOP22]] = distinct !{!22, !23, !24}
-; AMDGPU-DISABLED2: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"}
-; AMDGPU-DISABLED2: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"}
-; AMDGPU-DISABLED2: [[LOOP25]] = distinct !{!25, !23, !24}
-; AMDGPU-DISABLED2: [[TBAA26]] = !{!27, !27, i64 0}
-; AMDGPU-DISABLED2: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0}
-; AMDGPU-DISABLED2: [[LOOP28]] = distinct !{!28, !23, !24}
-; AMDGPU-DISABLED2: [[LOOP29]] = distinct !{!29, !23, !24}
-;.
-; NVPTX-DISABLED1: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
-; NVPTX-DISABLED1: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
-; NVPTX-DISABLED1: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
-; NVPTX-DISABLED1: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
-; NVPTX-DISABLED1: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
-; NVPTX-DISABLED1: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; NVPTX-DISABLED1: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-; NVPTX-DISABLED1: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-; NVPTX-DISABLED1: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-; NVPTX-DISABLED1: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-; NVPTX-DISABLED1: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-; NVPTX-DISABLED1: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
-; NVPTX-DISABLED1: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX-DISABLED1: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX-DISABLED1: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; NVPTX-DISABLED1: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; NVPTX-DISABLED1: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; NVPTX-DISABLED1: [[META17:![0-9]+]] = !{!"clang version 14.0.0"}
-; NVPTX-DISABLED1: [[TBAA18]] = !{!19, !19, i64 0}
-; NVPTX-DISABLED1: [[META19:![0-9]+]] = !{!"int", !20, i64 0}
-; NVPTX-DISABLED1: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0}
-; NVPTX-DISABLED1: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"}
-; NVPTX-DISABLED1: [[LOOP22]] = distinct !{!22, !23, !24}
-; NVPTX-DISABLED1: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"}
-; NVPTX-DISABLED1: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"}
-; NVPTX-DISABLED1: [[LOOP25]] = distinct !{!25, !23, !24}
-; NVPTX-DISABLED1: [[TBAA26]] = !{!27, !27, i64 0}
-; NVPTX-DISABLED1: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0}
-; NVPTX-DISABLED1: [[LOOP28]] = distinct !{!28, !23, !24}
-; NVPTX-DISABLED1: [[LOOP29]] = distinct !{!29, !23, !24}
+; AMDGPU-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
+; AMDGPU-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
+; AMDGPU-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
+; AMDGPU-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
+; AMDGPU-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
+; AMDGPU-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
+; AMDGPU-DISABLED: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
+; AMDGPU-DISABLED: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
+; AMDGPU-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
+; AMDGPU-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
+; AMDGPU-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
+; AMDGPU-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
+; AMDGPU-DISABLED: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; AMDGPU-DISABLED: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; AMDGPU-DISABLED: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; AMDGPU-DISABLED: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; AMDGPU-DISABLED: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; AMDGPU-DISABLED: [[META17:![0-9]+]] = !{!"clang version 14.0.0"}
+; AMDGPU-DISABLED: [[TBAA18]] = !{!19, !19, i64 0}
+; AMDGPU-DISABLED: [[META19:![0-9]+]] = !{!"int", !20, i64 0}
+; AMDGPU-DISABLED: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0}
+; AMDGPU-DISABLED: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"}
+; AMDGPU-DISABLED: [[LOOP22]] = distinct !{!22, !23, !24}
+; AMDGPU-DISABLED: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"}
+; AMDGPU-DISABLED: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"}
+; AMDGPU-DISABLED: [[LOOP25]] = distinct !{!25, !23, !24}
+; AMDGPU-DISABLED: [[TBAA26]] = !{!27, !27, i64 0}
+; AMDGPU-DISABLED: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0}
+; AMDGPU-DISABLED: [[LOOP28]] = distinct !{!28, !23, !24}
+; AMDGPU-DISABLED: [[LOOP29]] = distinct !{!29, !23, !24}
;.
-; NVPTX-DISABLED2: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
-; NVPTX-DISABLED2: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
-; NVPTX-DISABLED2: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
-; NVPTX-DISABLED2: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
-; NVPTX-DISABLED2: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
-; NVPTX-DISABLED2: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-; NVPTX-DISABLED2: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
-; NVPTX-DISABLED2: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
-; NVPTX-DISABLED2: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
-; NVPTX-DISABLED2: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
-; NVPTX-DISABLED2: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
-; NVPTX-DISABLED2: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
-; NVPTX-DISABLED2: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; NVPTX-DISABLED2: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; NVPTX-DISABLED2: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; NVPTX-DISABLED2: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
-; NVPTX-DISABLED2: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; NVPTX-DISABLED2: [[META17:![0-9]+]] = !{!"clang version 14.0.0"}
-; NVPTX-DISABLED2: [[TBAA18]] = !{!19, !19, i64 0}
-; NVPTX-DISABLED2: [[META19:![0-9]+]] = !{!"int", !20, i64 0}
-; NVPTX-DISABLED2: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0}
-; NVPTX-DISABLED2: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"}
-; NVPTX-DISABLED2: [[LOOP22]] = distinct !{!22, !23, !24}
-; NVPTX-DISABLED2: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"}
-; NVPTX-DISABLED2: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"}
-; NVPTX-DISABLED2: [[LOOP25]] = distinct !{!25, !23, !24}
-; NVPTX-DISABLED2: [[TBAA26]] = !{!27, !27, i64 0}
-; NVPTX-DISABLED2: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0}
-; NVPTX-DISABLED2: [[LOOP28]] = distinct !{!28, !23, !24}
-; NVPTX-DISABLED2: [[LOOP29]] = distinct !{!29, !23, !24}
+; NVPTX-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
+; NVPTX-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
+; NVPTX-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0}
+; NVPTX-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
+; NVPTX-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
+; NVPTX-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
+; NVPTX-DISABLED: [[META6:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1}
+; NVPTX-DISABLED: [[META7:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1}
+; NVPTX-DISABLED: [[META8:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1}
+; NVPTX-DISABLED: [[META9:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1}
+; NVPTX-DISABLED: [[META10:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1}
+; NVPTX-DISABLED: [[META11:![0-9]+]] = !{ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1}
+; NVPTX-DISABLED: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+; NVPTX-DISABLED: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50}
+; NVPTX-DISABLED: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; NVPTX-DISABLED: [[META15:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
+; NVPTX-DISABLED: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
+; NVPTX-DISABLED: [[META17:![0-9]+]] = !{!"clang version 14.0.0"}
+; NVPTX-DISABLED: [[TBAA18]] = !{!19, !19, i64 0}
+; NVPTX-DISABLED: [[META19:![0-9]+]] = !{!"int", !20, i64 0}
+; NVPTX-DISABLED: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0}
+; NVPTX-DISABLED: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"}
+; NVPTX-DISABLED: [[LOOP22]] = distinct !{!22, !23, !24}
+; NVPTX-DISABLED: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"}
+; NVPTX-DISABLED: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"}
+; NVPTX-DISABLED: [[LOOP25]] = distinct !{!25, !23, !24}
+; NVPTX-DISABLED: [[TBAA26]] = !{!27, !27, i64 0}
+; NVPTX-DISABLED: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0}
+; NVPTX-DISABLED: [[LOOP28]] = distinct !{!28, !23, !24}
+; NVPTX-DISABLED: [[LOOP29]] = distinct !{!29, !23, !24}
;.
diff --git a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll
index e02c3af750a298..2ccb253b62b8f6 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll
@@ -13,19 +13,19 @@
target triple = "nvptx64"
%struct.ident_t = type { i32, i32, i32, i32, ptr }
-%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
- at __omp_offloading_fd02_404433c2_main_l5_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr @1, ptr null }
-
+ at __omp_offloading_fd02_404433c2_main_l5_exec_mode = weak constant i8 1
+ at llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata"
; Function Attrs: alwaysinline convergent norecurse nounwind
;.
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
-; CHECK: @[[__OMP_OFFLOADING_FD02_404433C2_MAIN_L5_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr @[[GLOB1]], ptr null }
+; CHECK: @[[__OMP_OFFLOADING_FD02_404433C2_MAIN_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x ptr] [ptr @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata"
+; CHECK: @[[__OMP_OFFLOADING_FD02_404433C2_MAIN_L5_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
;.
define weak void @__omp_offloading_fd02_404433c2_main_l5(ptr nonnull align 8 dereferenceable(8) %x) local_unnamed_addr #0 {
@@ -33,7 +33,7 @@ define weak void @__omp_offloading_fd02_404433c2_main_l5(ptr nonnull align 8 der
; CHECK-SAME: (ptr nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_fd02_404433c2_main_l5_kernel_environment) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR3:[0-9]+]]
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
; CHECK: common.ret:
@@ -56,12 +56,12 @@ define weak void @__omp_offloading_fd02_404433c2_main_l5(ptr nonnull align 8 der
; CHECK-NEXT: br label [[REGION_EXIT:%.*]]
; CHECK: region.exit:
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS]], i64 0) #[[ATTR3]]
-; CHECK-NEXT: call void @__kmpc_target_deinit() #[[ATTR3]]
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2) #[[ATTR3]]
; CHECK-NEXT: br label [[COMMON_RET]]
;
entry:
%captured_vars_addrs = alloca [0 x ptr], align 8
- %0 = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_fd02_404433c2_main_l5_kernel_environment) #3
+ %0 = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #3
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
@@ -73,11 +73,11 @@ user_code.entry: ; preds = %entry
%call.i = call double @__nv_sin(double 0x400921FB54442D18) #6
store double %call.i, ptr %x, align 8, !tbaa !8
call void @__kmpc_parallel_51(ptr nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr nonnull %captured_vars_addrs, i64 0) #3
- call void @__kmpc_target_deinit() #3
+ call void @__kmpc_target_deinit(ptr nonnull @1, i8 1) #3
br label %common.ret
}
-declare i32 @__kmpc_target_init(ptr) local_unnamed_addr
+declare i32 @__kmpc_target_init(ptr, i8, i1) local_unnamed_addr
; Function Attrs: alwaysinline mustprogress nofree norecurse nosync nounwind readnone willreturn
define internal void @__omp_outlined__(ptr noalias nocapture %.global_tid., ptr noalias nocapture %.bound_tid.) #1 {
@@ -113,7 +113,7 @@ declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #3
; Function Attrs: alwaysinline
declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) local_unnamed_addr #4
-declare void @__kmpc_target_deinit() local_unnamed_addr
+declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr
; Function Attrs: convergent
declare double @__nv_sin(double) local_unnamed_addr #5
diff --git a/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll b/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll
index 6de019128e0d4e..dce799104e70be 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll
@@ -2,8 +2,8 @@
;
; Verify we change it to SPMD mode but also avoid propagating the old mode (=generic) into the __kmpc_target_init function.
;
-; CHECK: @__omp_offloading_20_11e3950_main_l12_kernel_environment = local_unnamed_addr addrspace(1) constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 3 }, ptr addrspacecast (ptr addrspace(1) @1 to ptr), ptr null }
; CHECK-NOT: store i32 0, ptr addrspace(3) @IsSPMDMode
+; CHECK: call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 2, i1 false)
; CHECK-NOT: store i32 0, ptr addrspace(3) @IsSPMDMode
; CHECK: store i32 1, ptr addrspace(3) @IsSPMDMode
; CHECK-NOT: store i32 0, ptr addrspace(3) @IsSPMDMode
@@ -13,8 +13,6 @@ target triple = "amdgcn-amd-amdhsa"
%struct.ident_t = type { i32, i32, i32, i32, ptr }
%struct.DeviceEnvironmentTy = type { i32, i32, i32, i32 }
-%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
%"struct.(anonymous namespace)::SharedMemorySmartStackTy" = type { [512 x i8], [1024 x i8] }
%"struct.(anonymous namespace)::TeamStateTy" = type { %"struct.(anonymous namespace)::ICVStateTy", i32, ptr }
%"struct.(anonymous namespace)::ICVStateTy" = type { i32, i32, i32, i32, i32, i32 }
@@ -27,7 +25,6 @@ target triple = "amdgcn-amd-amdhsa"
@__omp_rtl_debug_kind = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0
@__omp_rtl_assume_no_thread_state = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0
@omptarget_device_environment = weak protected addrspace(4) global %struct.DeviceEnvironmentTy undef, align 4
- at __omp_offloading_20_11e3950_main_l12_kernel_environment = local_unnamed_addr addrspace(1) constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr addrspacecast (ptr addrspace(1) @1 to ptr), ptr null }
@IsSPMDMode = weak hidden addrspace(3) global i32 undef, align 4
@.str.12 = private unnamed_addr addrspace(4) constant [47 x i8] c"ValueRAII initialization with wrong old value!\00", align 1
@_ZN12_GLOBAL__N_122SharedMemorySmartStackE = internal addrspace(3) global %"struct.(anonymous namespace)::SharedMemorySmartStackTy" undef, align 16
@@ -44,7 +41,7 @@ define weak_odr amdgpu_kernel void @__omp_offloading_20_11e3950_main_l12(i64 nou
entry:
%ng1 = alloca i32, align 4
%captured_vars_addrs = alloca [2 x ptr], align 8, addrspace(5)
- %0 = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @__omp_offloading_20_11e3950_main_l12_kernel_environment to ptr))
+ %0 = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
@@ -52,7 +49,7 @@ user_code.entry: ; preds = %entry
%captured_vars_addrs.ascast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
store ptr %ng1, ptr addrspace(5) %captured_vars_addrs, align 8, !tbaa !7
call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i32 0, i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull %captured_vars_addrs.ascast, i64 2)
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 1)
br label %common.ret
common.ret: ; preds = %user_code.entry, %entry
@@ -107,12 +104,9 @@ entry:
}
; Function Attrs: convergent nounwind
-; define internal i32 @__kmpc_target_init(ptr nocapture noundef readnone %Ident, i8 noundef signext %Mode, i1 noundef zeroext %UseGenericStateMachine) local_unnamed_addr #9 {
-define internal i32 @__kmpc_target_init(ptr nofree noundef nonnull align 8 dereferenceable(24) %KernelEnvironment) local_unnamed_addr #9 {
+define internal i32 @__kmpc_target_init(ptr nocapture noundef readnone %Ident, i8 noundef signext %Mode, i1 noundef zeroext %UseGenericStateMachine) local_unnamed_addr #9 {
entry:
%0 = and i32 undef, undef
- %ExecMode = getelementptr inbounds %struct.ConfigurationEnvironmentTy, ptr %KernelEnvironment, i64 0, i32 2
- %Mode = load i8, ptr %ExecMode, align 2, !tbaa !28
%1 = and i8 %Mode, 2
%tobool.not = icmp eq i8 %1, 0
br i1 %tobool.not, label %if.else, label %if.then
@@ -254,8 +248,6 @@ _ZN4ompx7mapping23isInitialThreadInLevel0Eb.exit: ; preds = %if.end
if.end10: ; preds = %_ZN4ompx7mapping23isInitialThreadInLevel0Eb.exit
%sub.i = add nsw i32 %24, -64
%cmp = icmp ult i32 %25, %sub.i
- %34 = load i8, ptr %KernelEnvironment, align 8
- %UseGenericStateMachine = icmp ne i8 %34, 0
%or.cond251 = select i1 %UseGenericStateMachine, i1 %cmp, i1 false
br i1 %or.cond251, label %do.body.i, label %_ZN14DebugEntryRAIID2Ev.exit250
@@ -269,7 +261,7 @@ _ZN14DebugEntryRAIID2Ev.exit250: ; preds = %do.body.i, %if.end1
}
; Function Attrs: nounwind
-define internal void @__kmpc_target_deinit() local_unnamed_addr #10 {
+define internal void @__kmpc_target_deinit(ptr nocapture noundef readnone %Ident, i8 noundef signext %Mode) local_unnamed_addr #10 {
ret void
}
diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
index 342d9e90e9c55e..4dad4da6564920 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
@@ -35,27 +35,29 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64"
%struct.ident_t = type { i32, i32, i32, i32, ptr }
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
-%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
+ at __omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode = weak constant i8 1
+ at llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata"
@LocGlob = private unnamed_addr addrspace(5) global i32 43
- at __omp_offloading_2a_fbfa7a_sequential_loop_l6_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
-
; Function Attrs: convergent norecurse nounwind
;.
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
+; CHECK: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x ptr] [ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata"
; CHECK: @[[LOCGLOB:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(5) global i32 43
-; CHECK: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr @[[GLOB1]], ptr null }
+; CHECK: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
;.
; CHECK-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
+; CHECK-DISABLED: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; CHECK-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x ptr] [ptr @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata"
; CHECK-DISABLED: @[[LOCGLOB:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(5) global i32 43
-; CHECK-DISABLED: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
+; CHECK-DISABLED: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
;.
define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %x, i64 %N) #0 {
@@ -66,7 +68,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %x, i64 %N)
; CHECK-NEXT: [[LOC:%.*]] = alloca ptr, align 8
; CHECK-NEXT: [[AL32:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[N_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[N]] to i32
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_2a_fbfa7a_sequential_loop_l6_kernel_environment) #[[ATTR6:[0-9]+]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR6:[0-9]+]]
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
@@ -177,7 +179,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %x, i64 %N)
; CHECK-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8
; CHECK-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8
; CHECK-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8
-; CHECK-NEXT: call void @__kmpc_target_deinit() #[[ATTR6]]
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 2) #[[ATTR6]]
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
@@ -190,7 +192,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %x, i64 %N)
; CHECK-DISABLED-NEXT: [[LOC:%.*]] = alloca ptr, align 8
; CHECK-DISABLED-NEXT: [[AL32:%.*]] = alloca i32, align 4
; CHECK-DISABLED-NEXT: [[N_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[N]] to i32
-; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_2a_fbfa7a_sequential_loop_l6_kernel_environment) #[[ATTR6:[0-9]+]]
+; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr nonnull @[[GLOB1]], i8 1, i1 false) #[[ATTR6:[0-9]+]]
; CHECK-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; CHECK-DISABLED: is_worker_check:
@@ -269,7 +271,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %x, i64 %N)
; CHECK-DISABLED-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8
; CHECK-DISABLED-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8
; CHECK-DISABLED-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(ptr nonnull [[X]]) #[[ATTR10]], !noalias !8
-; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit() #[[ATTR6]]
+; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr nonnull @[[GLOB1]], i8 1) #[[ATTR6]]
; CHECK-DISABLED-NEXT: ret void
; CHECK-DISABLED: worker.exit:
; CHECK-DISABLED-NEXT: ret void
@@ -278,7 +280,7 @@ entry:
%loc = alloca ptr
%al32 = alloca i32
%N.addr.sroa.0.0.extract.trunc = trunc i64 %N to i32
- %0 = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_2a_fbfa7a_sequential_loop_l6_kernel_environment) #3
+ %0 = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #3
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
@@ -333,7 +335,7 @@ __omp_outlined__.exit: ; preds = %for.cond.i
%call14.i = call i32 @no_openmp(ptr nonnull %x) #5, !noalias !8
%call15.i = call i32 @no_openmp(ptr nonnull %x) #5, !noalias !8
%call16.i = call i32 @no_openmp(ptr nonnull %x) #5, !noalias !8
- call void @__kmpc_target_deinit() #3
+ call void @__kmpc_target_deinit(ptr nonnull @1, i8 1) #3
ret void
worker.exit: ; preds = %entry
@@ -366,13 +368,13 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) {
declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64)
; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
-define weak i32 @__kmpc_target_init(ptr) {
+define weak i32 @__kmpc_target_init(ptr, i8, i1) {
; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
-; CHECK-SAME: (ptr [[TMP0:%.*]]) {
+; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-NEXT: ret i32 0
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init
-; CHECK-DISABLED-SAME: (ptr [[TMP0:%.*]]) {
+; CHECK-DISABLED-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-DISABLED-NEXT: ret i32 0
;
ret i32 0
@@ -392,7 +394,7 @@ declare void @usei8ptr(ptr) #1
; Function Attrs: nounwind
declare i32 @__kmpc_global_thread_num(ptr) #3
-declare void @__kmpc_target_deinit()
+declare void @__kmpc_target_deinit(ptr, i8)
; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn
declare void @llvm.experimental.noalias.scope.decl(metadata) #4
diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll
index e780687cd795c0..03adf3c356e731 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll
@@ -30,42 +30,48 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64"
%struct.ident_t = type { i32, i32, i32, i32, ptr }
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
-%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
+ at __omp_offloading_2b_10393b5_spmd_l12_exec_mode = weak constant i8 1
+ at __omp_offloading_2b_10393b5_generic_l20_exec_mode = weak constant i8 1
@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8
@G = external global i32, align 4
- at __omp_offloading_2b_10393b5_spmd_l12_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr @1, ptr null }
- at __omp_offloading_2b_10393b5_generic_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr @1, ptr null }
+ at llvm.compiler.used = appending global [2 x ptr] [ptr @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, ptr @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata"
;.
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
+; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4
-; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 3 }, ptr @[[GLOB1]], ptr null }
-; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr @[[GLOB1]], ptr null }
+; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, ptr @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata"
+; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[GLOB3:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
+; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
;.
; CHECK-DISABLE-SPMDIZATION: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK-DISABLE-SPMDIZATION: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
+; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK-DISABLE-SPMDIZATION: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
; CHECK-DISABLE-SPMDIZATION: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32, align 4
-; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 1, i8 1 }, ptr @[[GLOB1]], ptr null }
-; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
+; CHECK-DISABLE-SPMDIZATION: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, ptr @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata"
+; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
+; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OUTLINED___WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
;.
-define weak void @__omp_offloading_2b_10393b5_spmd_l12() "kernel" #0 {
+define weak void @__omp_offloading_2b_10393b5_spmd_l12() #0 {
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_spmd_l12
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment)
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: call void @spmd_helper() #[[ATTR6:[0-9]+]]
-; CHECK-NEXT: call void @__kmpc_target_deinit()
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
@@ -73,24 +79,58 @@ define weak void @__omp_offloading_2b_10393b5_spmd_l12() "kernel" #0 {
; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_spmd_l12
; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-DISABLE-SPMDIZATION-NEXT: entry:
-; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment)
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
+; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
+; CHECK-DISABLE-SPMDIZATION: is_worker_check:
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
+; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
+; CHECK-DISABLE-SPMDIZATION: worker_state_machine.begin:
+; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast ptr [[WORKER_WORK_FN]] to ptr
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
+; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
+; CHECK-DISABLE-SPMDIZATION: worker_state_machine.finished:
+; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
+; CHECK-DISABLE-SPMDIZATION: worker_state_machine.is_active.check:
+; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
+; CHECK-DISABLE-SPMDIZATION: worker_state_machine.parallel_region.check:
+; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
+; CHECK-DISABLE-SPMDIZATION: worker_state_machine.parallel_region.execute:
+; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__omp_outlined___wrapper(i16 0, i32 [[TMP0]])
+; CHECK-DISABLE-SPMDIZATION-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
+; CHECK-DISABLE-SPMDIZATION: worker_state_machine.parallel_region.check1:
+; CHECK-DISABLE-SPMDIZATION-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
+; CHECK-DISABLE-SPMDIZATION: worker_state_machine.parallel_region.end:
+; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_kernel_end_parallel()
+; CHECK-DISABLE-SPMDIZATION-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
+; CHECK-DISABLE-SPMDIZATION: worker_state_machine.done.barrier:
+; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
+; CHECK-DISABLE-SPMDIZATION-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]]
+; CHECK-DISABLE-SPMDIZATION: thread.user_code.check:
; CHECK-DISABLE-SPMDIZATION-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK-DISABLE-SPMDIZATION: user_code.entry:
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @spmd_helper() #[[ATTR6:[0-9]+]]
-; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit()
+; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
; CHECK-DISABLE-SPMDIZATION: worker.exit:
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
;
entry:
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
call void @spmd_helper() #5
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
worker.exit: ; preds = %entry
@@ -98,26 +138,26 @@ worker.exit: ; preds = %entry
}
; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
-define weak i32 @__kmpc_target_init(ptr) {
+define weak i32 @__kmpc_target_init(ptr, i8, i1) {
; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
-; CHECK-SAME: (ptr [[TMP0:%.*]]) {
+; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-NEXT: ret i32 0
;
; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__kmpc_target_init
-; CHECK-DISABLE-SPMDIZATION-SAME: (ptr [[TMP0:%.*]]) {
+; CHECK-DISABLE-SPMDIZATION-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-DISABLE-SPMDIZATION-NEXT: ret i32 0
;
ret i32 0
}
-declare void @__kmpc_target_deinit()
+declare void @__kmpc_target_deinit(ptr, i8)
; Function Attrs: convergent noinline norecurse nounwind
define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 {
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_generic_l20
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment)
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0
; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]]
@@ -128,7 +168,7 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 {
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: call void @generic_helper() #[[ATTR7:[0-9]+]]
-; CHECK-NEXT: call void @__kmpc_target_deinit()
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
@@ -136,24 +176,24 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 {
; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_generic_l20
; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR0]] {
; CHECK-DISABLE-SPMDIZATION-NEXT: entry:
-; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment)
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; CHECK-DISABLE-SPMDIZATION-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK-DISABLE-SPMDIZATION: user_code.entry:
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @generic_helper() #[[ATTR7:[0-9]+]]
-; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit()
+; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
; CHECK-DISABLE-SPMDIZATION: worker.exit:
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
;
entry:
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
call void @generic_helper() #5
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
worker.exit: ; preds = %entry
@@ -177,7 +217,7 @@ define internal void @spmd_helper() #1 {
; CHECK-DISABLE-SPMDIZATION-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR7]]
; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3:[0-9]+]]
-; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll b/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll
index 04644a98983f05..e8f8104dacbd08 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll
@@ -31,42 +31,47 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64"
%struct.ident_t = type { i32, i32, i32, i32, ptr }
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
-%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
+ at __omp_offloading_2b_10393b5_spmd_l12_exec_mode = weak constant i8 1
+ at __omp_offloading_2b_10393b5_generic_l20_exec_mode = weak constant i8 1
@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8
@G = external addrspace(5) global i32, align 4
- at __omp_offloading_2b_10393b5_spmd_l12_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
- at __omp_offloading_2b_10393b5_generic_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
+ at llvm.compiler.used = appending global [2 x ptr] [ptr @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, ptr @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata"
;.
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
+; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(5) global i32, align 4
-; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 3 }, ptr @[[GLOB1]], ptr null }
-; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
+; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, ptr @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata"
+; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
+; CHECK: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
;.
; CHECK-DISABLE-SPMDIZATION: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; CHECK-DISABLE-SPMDIZATION: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
+; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
; CHECK-DISABLE-SPMDIZATION: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
; CHECK-DISABLE-SPMDIZATION: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(5) global i32, align 4
-; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
-; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr @[[GLOB1]], ptr null }
+; CHECK-DISABLE-SPMDIZATION: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @__omp_offloading_2b_10393b5_spmd_l12_exec_mode, ptr @__omp_offloading_2b_10393b5_generic_l20_exec_mode], section "llvm.metadata"
+; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_SPMD_L12_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
+; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OFFLOADING_2B_10393B5_GENERIC_L20_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK-DISABLE-SPMDIZATION: @[[__OMP_OUTLINED___WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
;.
define weak void @__omp_offloading_2b_10393b5_spmd_l12() #0 {
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_spmd_l12
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment)
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false)
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: call void @spmd_helper() #[[ATTR7:[0-9]+]]
-; CHECK-NEXT: call void @__kmpc_target_deinit()
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
@@ -75,7 +80,7 @@ define weak void @__omp_offloading_2b_10393b5_spmd_l12() #0 {
; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-DISABLE-SPMDIZATION-NEXT: entry:
; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment)
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; CHECK-DISABLE-SPMDIZATION-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; CHECK-DISABLE-SPMDIZATION: is_worker_check:
@@ -113,19 +118,19 @@ define weak void @__omp_offloading_2b_10393b5_spmd_l12() #0 {
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK-DISABLE-SPMDIZATION: user_code.entry:
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @spmd_helper() #[[ATTR7:[0-9]+]]
-; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit()
+; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
; CHECK-DISABLE-SPMDIZATION: worker.exit:
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
;
entry:
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
call void @spmd_helper() #5
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
worker.exit: ; preds = %entry
@@ -133,19 +138,19 @@ worker.exit: ; preds = %entry
}
; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
-define weak i32 @__kmpc_target_init(ptr) {
+define weak i32 @__kmpc_target_init(ptr, i8, i1) {
; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
-; CHECK-SAME: (ptr [[TMP0:%.*]]) {
+; CHECK-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-NEXT: ret i32 0
;
; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__kmpc_target_init
-; CHECK-DISABLE-SPMDIZATION-SAME: (ptr [[TMP0:%.*]]) {
+; CHECK-DISABLE-SPMDIZATION-SAME: (ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) {
; CHECK-DISABLE-SPMDIZATION-NEXT: ret i32 0
;
ret i32 0
}
-declare void @__kmpc_target_deinit()
+declare void @__kmpc_target_deinit(ptr, i8)
; Function Attrs: convergent noinline norecurse nounwind
define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 {
@@ -153,7 +158,7 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 {
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment)
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; CHECK: is_worker_check:
@@ -187,7 +192,7 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 {
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
; CHECK-NEXT: call void @generic_helper() #[[ATTR7]]
-; CHECK-NEXT: call void @__kmpc_target_deinit()
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-NEXT: ret void
@@ -196,7 +201,7 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 {
; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR0]] {
; CHECK-DISABLE-SPMDIZATION-NEXT: entry:
; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment)
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; CHECK-DISABLE-SPMDIZATION-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
; CHECK-DISABLE-SPMDIZATION: is_worker_check:
@@ -230,19 +235,19 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 {
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK-DISABLE-SPMDIZATION: user_code.entry:
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @generic_helper() #[[ATTR7]]
-; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit()
+; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
; CHECK-DISABLE-SPMDIZATION: worker.exit:
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
;
entry:
- %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment)
+ %0 = call i32 @__kmpc_target_init(ptr @1, i8 1, i1 true)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
call void @generic_helper() #5
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr @1, i8 1)
ret void
worker.exit: ; preds = %entry
diff --git a/llvm/test/Transforms/OpenMP/spmdization_remarks.ll b/llvm/test/Transforms/OpenMP/spmdization_remarks.ll
index 08ddcfc07d50cf..368bc6a6fc38bf 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_remarks.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_remarks.ll
@@ -38,8 +38,6 @@ target triple = "nvptx64"
;; }
%struct.ident_t = type { i32, i32, i32, i32, ptr }
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
-%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
@0 = private unnamed_addr constant [103 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@@ -47,25 +45,24 @@ target triple = "nvptx64"
@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @2 }, align 8
@4 = private unnamed_addr constant [104 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;25;;\00", align 1
@5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @4 }, align 8
+ at __omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode = weak constant i8 1
@6 = private unnamed_addr constant [106 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;1;;\00", align 1
@7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @6 }, align 8
@8 = private unnamed_addr constant [75 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;test_no_fallback;20;1;;\00", align 1
@9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @8 }, align 8
@10 = private unnamed_addr constant [107 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;25;;\00", align 1
@11 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @10 }, align 8
+ at __omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode = weak constant i8 1
@12 = private unnamed_addr constant [63 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;known;4;1;;\00", align 1
@13 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @12 }, align 8
@G = external global i32
-
- at __omp_offloading_2a_d80d3d_test_fallback_l11_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
- at __omp_offloading_2a_d80d3d_test_no_fallback_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr @1, ptr null }
-
+ at llvm.compiler.used = appending global [2 x ptr] [ptr @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode, ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode], section "llvm.metadata"
; Function Attrs: convergent norecurse nounwind
define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 {
entry:
%captured_vars_addrs.i.i = alloca [0 x ptr], align 8
- %0 = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_2a_d80d3d_test_fallback_l11_kernel_environment) #3, !dbg !18
+ %0 = call i32 @__kmpc_target_init(ptr nonnull @1, i8 1, i1 true) #3, !dbg !18
%exec_user_code = icmp eq i32 %0, -1, !dbg !18
br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18
@@ -80,11 +77,11 @@ user_code.entry: ; preds = %entry
call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %2, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i.i, i64 noundef 0) #3, !dbg !23
call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i.i) #3, !dbg !26
call void @unknown() #6, !dbg !27
- call void @__kmpc_target_deinit() #3, !dbg !28
+ call void @__kmpc_target_deinit(ptr nonnull @5, i8 1) #3, !dbg !28
br label %common.ret
}
-define weak i32 @__kmpc_target_init(ptr) {
+define weak i32 @__kmpc_target_init(ptr, i8, i1) {
ret i32 0
}
@@ -104,13 +101,13 @@ entry:
; Function Attrs: nounwind
declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #3
-declare void @__kmpc_target_deinit() local_unnamed_addr
+declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr
; Function Attrs: norecurse nounwind
define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 {
entry:
%captured_vars_addrs.i2.i = alloca [0 x ptr], align 8
- %0 = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_2a_d80d3d_test_no_fallback_l20_kernel_environment) #3, !dbg !33
+ %0 = call i32 @__kmpc_target_init(ptr nonnull @7, i8 1, i1 true) #3, !dbg !33
%exec_user_code = icmp eq i32 %0, -1, !dbg !33
br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33
@@ -132,7 +129,7 @@ user_code.entry: ; preds = %entry
call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %4, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i2.i, i64 noundef 0) #3, !dbg !43
call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !45
call void @spmd_amenable()
- call void @__kmpc_target_deinit() #3, !dbg !46
+ call void @__kmpc_target_deinit(ptr nonnull @11, i8 1) #3, !dbg !46
br label %common.ret
}
diff --git a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
index 8bc5149e244cb1..e8e2c4f4144ed6 100644
--- a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
+++ b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll
@@ -4,8 +4,7 @@
target triple = "amdgcn-amd-amdhsa"
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
-%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
@G = internal addrspace(3) global i32 undef, align 4
@H = internal addrspace(3) global i32 undef, align 4
@@ -25,36 +24,54 @@ target triple = "amdgcn-amd-amdhsa"
@UAA1 = internal addrspace(3) global i32 undef, align 4
@UAA2 = internal addrspace(3) global i32 undef, align 4
@str = private unnamed_addr addrspace(4) constant [1 x i8] c"\00", align 1
- at kernel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1 }, ptr null, ptr null }
; Make sure we do not delete the stores to @G without also replacing the load with `1`.
;.
-; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
-; CHECK: @[[H:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
-; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
-; CHECK: @[[QA1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
-; CHECK: @[[QB1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
-; CHECK: @[[QC1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
-; CHECK: @[[QD1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
-; CHECK: @[[QA2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
-; CHECK: @[[QB2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
-; CHECK: @[[QC2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
-; CHECK: @[[QD2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
-; CHECK: @[[QA3:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
-; CHECK: @[[QB3:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
-; CHECK: @[[QC3:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
-; CHECK: @[[QD3:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
-; CHECK: @[[UAA1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
-; CHECK: @[[UAA2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
-; CHECK: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
-; CHECK: @[[KERNEL_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = local_unnamed_addr constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY:%.*]] { i8 0, i8 0, i8 1 }, ptr null, ptr null }
+; TUNIT: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; TUNIT: @[[H:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; TUNIT: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; TUNIT: @[[QA1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; TUNIT: @[[QB1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; TUNIT: @[[QC1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; TUNIT: @[[QD1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; TUNIT: @[[QA2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; TUNIT: @[[QB2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; TUNIT: @[[QC2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; TUNIT: @[[QD2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; TUNIT: @[[QA3:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; TUNIT: @[[QB3:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; TUNIT: @[[QC3:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; TUNIT: @[[QD3:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; TUNIT: @[[UAA1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; TUNIT: @[[UAA2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; TUNIT: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
+; TUNIT: @[[KERNEL_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
+;.
+; CGSCC: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; CGSCC: @[[H:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; CGSCC: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; CGSCC: @[[QA1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; CGSCC: @[[QB1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; CGSCC: @[[QC1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; CGSCC: @[[QD1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; CGSCC: @[[QA2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; CGSCC: @[[QB2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; CGSCC: @[[QC2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; CGSCC: @[[QD2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; CGSCC: @[[QA3:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; CGSCC: @[[QB3:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; CGSCC: @[[QC3:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; CGSCC: @[[QD3:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; CGSCC: @[[UAA1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; CGSCC: @[[UAA2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
+; CGSCC: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
;.
define void @kernel() "kernel" {
;
; TUNIT: Function Attrs: norecurse
; TUNIT-LABEL: define {{[^@]+}}@kernel
; TUNIT-SAME: () #[[ATTR0:[0-9]+]] {
-; TUNIT-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment)
+; TUNIT-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false)
; TUNIT-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
; TUNIT: if.then:
@@ -72,13 +89,13 @@ define void @kernel() "kernel" {
; TUNIT-NEXT: call void @barrier() #[[ATTR6]]
; TUNIT-NEXT: br label [[IF_END]]
; TUNIT: if.end:
-; TUNIT-NEXT: call void @__kmpc_target_deinit()
+; TUNIT-NEXT: call void @__kmpc_target_deinit(ptr undef, i8 1)
; TUNIT-NEXT: ret void
;
; CGSCC: Function Attrs: norecurse
; CGSCC-LABEL: define {{[^@]+}}@kernel
; CGSCC-SAME: () #[[ATTR0:[0-9]+]] {
-; CGSCC-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment)
+; CGSCC-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false)
; CGSCC-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
; CGSCC: if.then:
@@ -96,10 +113,10 @@ define void @kernel() "kernel" {
; CGSCC-NEXT: call void @barrier() #[[ATTR6]]
; CGSCC-NEXT: br label [[IF_END]]
; CGSCC: if.end:
-; CGSCC-NEXT: call void @__kmpc_target_deinit()
+; CGSCC-NEXT: call void @__kmpc_target_deinit(ptr undef, i8 1)
; CGSCC-NEXT: ret void
;
- %call = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment)
+ %call = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false)
%cmp = icmp eq i32 %call, -1
br i1 %cmp, label %if.then, label %if.else
if.then:
@@ -124,7 +141,7 @@ if.then2:
call void @barrier();
br label %if.end
if.end:
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr undef, i8 1)
ret void
}
@@ -738,8 +755,8 @@ S:
declare void @sync()
declare void @barrier() norecurse nounwind nocallback "llvm.assume"="ompx_aligned_barrier"
declare void @use1(i32) nosync norecurse nounwind nocallback
-declare i32 @__kmpc_target_init(ptr) nocallback
-declare void @__kmpc_target_deinit() nocallback
+declare i32 @__kmpc_target_init(ptr, i8, i1) nocallback
+declare void @__kmpc_target_deinit(ptr, i8) nocallback
declare void @llvm.assume(i1)
!llvm.module.flags = !{!0, !1}
diff --git a/llvm/test/Transforms/PhaseOrdering/openmp-opt-module.ll b/llvm/test/Transforms/PhaseOrdering/openmp-opt-module.ll
index b8486ab6a6c9ec..791fbbd055c9f2 100644
--- a/llvm/test/Transforms/PhaseOrdering/openmp-opt-module.ll
+++ b/llvm/test/Transforms/PhaseOrdering/openmp-opt-module.ll
@@ -1,22 +1,21 @@
; RUN: opt -passes='default<O2>' -pass-remarks-missed=openmp-opt < %s 2>&1 | FileCheck %s --check-prefix=MODULE
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8 }
-%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
+%struct.ident_t = type { i32, i32, i32, i32, ptr }
@.str = private unnamed_addr constant [13 x i8] c"Alloc Shared\00", align 1
+
@S = external local_unnamed_addr global ptr
- at foo_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1 }, ptr null, ptr null }
; MODULE: remark: openmp_opt_module.c:5:7: Found thread data sharing on the GPU. Expect degraded performance due to data globalization.
define void @foo() "kernel" {
entry:
- %i = call i32 @__kmpc_target_init(ptr @foo_kernel_environment)
+ %i = call i32 @__kmpc_target_init(ptr null, i1 false, i1 true, i1 true)
%x = call ptr @__kmpc_alloc_shared(i64 4), !dbg !10
call void @use(ptr %x)
call void @__kmpc_free_shared(ptr %x)
- call void @__kmpc_target_deinit()
+ call void @__kmpc_target_deinit(ptr null, i1 false, i1 true)
ret void
}
@@ -32,8 +31,8 @@ entry:
declare ptr @_Z10SafeMallocmPKc(i64 %size, ptr nocapture readnone %msg)
declare void @__kmpc_free_shared(ptr)
-declare i32 @__kmpc_target_init(ptr)
-declare void @__kmpc_target_deinit()
+declare i32 @__kmpc_target_init(ptr, i1, i1 %use_generic_state_machine, i1)
+declare void @__kmpc_target_deinit(ptr, i1, i1)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6}
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 171aff83172a8d..82ab4c9f4c93ab 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -5166,19 +5166,12 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
auto *InitCall = dyn_cast<CallInst>(Init);
EXPECT_NE(InitCall, nullptr);
EXPECT_EQ(InitCall->getCalledFunction()->getName(), "__kmpc_target_init");
- EXPECT_EQ(InitCall->arg_size(), 1U);
+ EXPECT_EQ(InitCall->arg_size(), 3U);
EXPECT_TRUE(isa<GlobalVariable>(InitCall->getArgOperand(0)));
- auto *KernelEnvGV = cast<GlobalVariable>(InitCall->getArgOperand(0));
- EXPECT_TRUE(isa<ConstantStruct>(KernelEnvGV->getInitializer()));
- auto *KernelEnvC = cast<ConstantStruct>(KernelEnvGV->getInitializer());
- EXPECT_TRUE(isa<ConstantStruct>(KernelEnvC->getAggregateElement(0U)));
- auto ConfigC = cast<ConstantStruct>(KernelEnvC->getAggregateElement(0U));
- EXPECT_EQ(ConfigC->getAggregateElement(0U),
- ConstantInt::get(Type::getInt8Ty(Ctx), true));
- EXPECT_EQ(ConfigC->getAggregateElement(1U),
- ConstantInt::get(Type::getInt8Ty(Ctx), true));
- EXPECT_EQ(ConfigC->getAggregateElement(2U),
+ EXPECT_EQ(InitCall->getArgOperand(1),
ConstantInt::get(Type::getInt8Ty(Ctx), OMP_TGT_EXEC_MODE_GENERIC));
+ EXPECT_EQ(InitCall->getArgOperand(2),
+ ConstantInt::get(Type::getInt1Ty(Ctx), true));
auto *EntryBlockBranch = EntryBlock.getTerminator();
EXPECT_NE(EntryBlockBranch, nullptr);
@@ -5195,7 +5188,10 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
auto *DeinitCall = dyn_cast<CallInst>(Deinit);
EXPECT_NE(DeinitCall, nullptr);
EXPECT_EQ(DeinitCall->getCalledFunction()->getName(), "__kmpc_target_deinit");
- EXPECT_EQ(DeinitCall->arg_size(), 0U);
+ EXPECT_EQ(DeinitCall->arg_size(), 2U);
+ EXPECT_TRUE(isa<GlobalVariable>(DeinitCall->getArgOperand(0)));
+ EXPECT_EQ(DeinitCall->getArgOperand(1),
+ ConstantInt::get(Type::getInt8Ty(Ctx), OMP_TGT_EXEC_MODE_GENERIC));
EXPECT_TRUE(isa<ReturnInst>(DeinitCall->getNextNode()));
diff --git a/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
index 90b7e43c48e192..126fff70ce3b1f 100644
--- a/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
@@ -25,10 +25,8 @@ module attributes {omp.is_target_device = true} {
// CHECK: @[[SRC_LOC:.*]] = private unnamed_addr constant [23 x i8] c"{{[^"]*}}", align 1
// CHECK: @[[IDENT:.*]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[SRC_LOC]] }, align 8
-// CHECK: @[[DYNA_ENV:.*]] = internal global %struct.DynamicEnvironmentTy zeroinitializer
-// CHECK: @[[KERNEL_ENV:.*]] = constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1 }, ptr @[[IDENT]], ptr @[[DYNA_ENV]] }
// CHECK: define weak_odr protected void @__omp_offloading_{{[^_]+}}_{{[^_]+}}_omp_target_region__l{{[0-9]+}}(ptr %[[ADDR_A:.*]], ptr %[[ADDR_B:.*]], ptr %[[ADDR_C:.*]])
-// CHECK: %[[INIT:.*]] = call i32 @__kmpc_target_init(ptr @[[KERNEL_ENV]])
+// CHECK: %[[INIT:.*]] = call i32 @__kmpc_target_init(ptr @[[IDENT]], i8 1, i1 true)
// CHECK-NEXT: %[[CMP:.*]] = icmp eq i32 %3, -1
// CHECK-NEXT: br i1 %[[CMP]], label %[[LABEL_ENTRY:.*]], label %[[LABEL_EXIT:.*]]
// CHECK: [[LABEL_ENTRY]]:
@@ -40,7 +38,7 @@ module attributes {omp.is_target_device = true} {
// CHECK: store i32 %[[C]], ptr %[[ADDR_C]], align 4
// CHECK: br label %[[LABEL_DEINIT:.*]]
// CHECK: [[LABEL_DEINIT]]:
-// CHECK-NEXT: call void @__kmpc_target_deinit()
+// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[IDENT]], i8 1)
// CHECK-NEXT: ret void
// CHECK: [[LABEL_EXIT]]:
// CHECK-NEXT: ret void
diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt
index 9615657a3d2721..630947abec7efc 100644
--- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt
+++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt
@@ -123,7 +123,6 @@ set(bc_flags -c -foffload-lto -std=c++17 -fvisibility=hidden
-nocudalib -nogpulib -nostdinc
-fopenmp -fopenmp-cuda-mode
-Wno-unknown-cuda-version
- -DOMPTARGET_DEVICE_RUNTIME
-I${include_directory}
-I${devicertl_base_directory}/../include
${LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL}
diff --git a/openmp/libomptarget/DeviceRTL/include/Debug.h b/openmp/libomptarget/DeviceRTL/include/Debug.h
index 29e7e5bb347d60..c7bcfcea57124a 100644
--- a/openmp/libomptarget/DeviceRTL/include/Debug.h
+++ b/openmp/libomptarget/DeviceRTL/include/Debug.h
@@ -50,6 +50,8 @@ void __assert_fail(const char *expr, const char *msg, const char *file,
struct DebugEntryRAII {
DebugEntryRAII(const char *File, const unsigned Line, const char *Function);
~DebugEntryRAII();
+
+ static void init();
};
#endif
diff --git a/openmp/libomptarget/DeviceRTL/include/Interface.h b/openmp/libomptarget/DeviceRTL/include/Interface.h
index 8b6019b9dc2ab8..648da49b86f555 100644
--- a/openmp/libomptarget/DeviceRTL/include/Interface.h
+++ b/openmp/libomptarget/DeviceRTL/include/Interface.h
@@ -214,14 +214,12 @@ uint32_t __kmpc_get_warp_size();
/// Kernel
///
///{
-// Forward declaration
-struct KernelEnvironmentTy;
-
int8_t __kmpc_is_spmd_exec_mode();
-int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment);
+int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode,
+ bool UseGenericStateMachine);
-void __kmpc_target_deinit();
+void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode);
///}
diff --git a/openmp/libomptarget/DeviceRTL/include/State.h b/openmp/libomptarget/DeviceRTL/include/State.h
index 60dc439f9551c2..3491866c2dc9bf 100644
--- a/openmp/libomptarget/DeviceRTL/include/State.h
+++ b/openmp/libomptarget/DeviceRTL/include/State.h
@@ -17,9 +17,6 @@
#include "Types.h"
#include "Utils.h"
-// Forward declaration.
-struct KernelEnvironmentTy;
-
#pragma omp begin declare target device_type(nohost)
namespace ompx {
@@ -117,10 +114,7 @@ extern ThreadStateTy **ThreadStates;
#pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc)
/// Initialize the state machinery. Must be called by all threads.
-void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment);
-
-/// Return the kernel environment associated with the current kernel.
-KernelEnvironmentTy &getKernelEnvironment();
+void init(bool IsSPMD);
/// TODO
enum ValueKind {
diff --git a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
index 82c1e0ba096f0f..994ff2b67bb34e 100644
--- a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
@@ -12,7 +12,7 @@
//===----------------------------------------------------------------------===//
#include "Configuration.h"
-#include "Environment.h"
+#include "DeviceEnvironment.h"
#include "State.h"
#include "Types.h"
@@ -57,9 +57,7 @@ bool config::isDebugMode(config::DebugKind Kind) {
bool config::mayUseThreadStates() { return !__omp_rtl_assume_no_thread_state; }
bool config::mayUseNestedParallelism() {
- if (__omp_rtl_assume_no_nested_parallelism)
- return false;
- return state::getKernelEnvironment().Configuration.MayUseNestedParallelism;
+ return !__omp_rtl_assume_no_nested_parallelism;
}
#pragma omp end declare target
diff --git a/openmp/libomptarget/DeviceRTL/src/Debug.cpp b/openmp/libomptarget/DeviceRTL/src/Debug.cpp
index 873dccb929ea66..2eaf3436e287c1 100644
--- a/openmp/libomptarget/DeviceRTL/src/Debug.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Debug.cpp
@@ -12,10 +12,8 @@
#include "Debug.h"
#include "Configuration.h"
-#include "Environment.h"
#include "Interface.h"
#include "Mapping.h"
-#include "State.h"
#include "Types.h"
using namespace ompx;
@@ -37,14 +35,15 @@ void __assert_fail(const char *expr, const char *msg, const char *file,
}
}
+/// Current indentation level for the function trace. Only accessed by thread 0.
+__attribute__((loader_uninitialized)) static uint32_t Level;
+#pragma omp allocate(Level) allocator(omp_pteam_mem_alloc)
+
DebugEntryRAII::DebugEntryRAII(const char *File, const unsigned Line,
const char *Function) {
if (config::isDebugMode(config::DebugKind::FunctionTracing) &&
mapping::getThreadIdInBlock() == 0 && mapping::getBlockId() == 0) {
- uint16_t &Level =
- state::getKernelEnvironment().DynamicEnv->DebugIndentionLevel;
-
for (int I = 0; I < Level; ++I)
PRINTF("%s", " ");
@@ -56,11 +55,10 @@ DebugEntryRAII::DebugEntryRAII(const char *File, const unsigned Line,
DebugEntryRAII::~DebugEntryRAII() {
if (config::isDebugMode(config::DebugKind::FunctionTracing) &&
- mapping::getThreadIdInBlock() == 0 && mapping::getBlockId() == 0) {
- uint16_t &Level =
- state::getKernelEnvironment().DynamicEnv->DebugIndentionLevel;
+ mapping::getThreadIdInBlock() == 0 && mapping::getBlockId() == 0)
Level--;
- }
}
+void DebugEntryRAII::init() { Level = 0; }
+
#pragma omp end declare target
diff --git a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp
index bc1d8d5b3d33f0..9502d1af6357b3 100644
--- a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "Debug.h"
-#include "Environment.h"
#include "Interface.h"
#include "Mapping.h"
#include "State.h"
@@ -24,12 +23,11 @@ using namespace ompx;
#pragma omp begin declare target device_type(nohost)
-static void inititializeRuntime(bool IsSPMD,
- KernelEnvironmentTy &KernelEnvironment) {
+static void inititializeRuntime(bool IsSPMD) {
// Order is important here.
synchronize::init(IsSPMD);
mapping::init(IsSPMD);
- state::init(IsSPMD, KernelEnvironment);
+ state::init(IsSPMD);
}
/// Simple generic state machine for worker threads.
@@ -69,17 +67,16 @@ extern "C" {
///
/// \param Ident Source location identification, can be NULL.
///
-int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment) {
+int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode,
+ bool UseGenericStateMachine) {
FunctionTracingRAII();
- ConfigurationEnvironmentTy &Configuration = KernelEnvironment.Configuration;
- bool IsSPMD = Configuration.ExecMode &
- llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD;
- bool UseGenericStateMachine = Configuration.UseGenericStateMachine;
+ const bool IsSPMD =
+ Mode & llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD;
if (IsSPMD) {
- inititializeRuntime(/* IsSPMD */ true, KernelEnvironment);
+ inititializeRuntime(/* IsSPMD */ true);
synchronize::threadsAligned(atomic::relaxed);
} else {
- inititializeRuntime(/* IsSPMD */ false, KernelEnvironment);
+ inititializeRuntime(/* IsSPMD */ false);
// No need to wait since only the main threads will execute user
// code and workers will run into a barrier right away.
}
@@ -111,7 +108,7 @@ int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment) {
// thread's warp, so none of its threads can ever be active worker threads.
if (UseGenericStateMachine &&
mapping::getThreadIdInBlock() < mapping::getBlockSize(IsSPMD)) {
- genericStateMachine(KernelEnvironment.Ident);
+ genericStateMachine(Ident);
} else {
// Retrieve the work function just to ensure we always call
// __kmpc_kernel_parallel even if a custom state machine is used.
@@ -135,10 +132,11 @@ int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment) {
///
/// \param Ident Source location identification, can be NULL.
///
-void __kmpc_target_deinit() {
+void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode) {
FunctionTracingRAII();
- bool IsSPMD = mapping::isSPMDMode();
- state::assumeInitialState(IsSPMD);
+ const bool IsSPMD =
+ Mode & llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD;
+
if (IsSPMD)
return;
diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp
index f824ea2809c217..c181478c218db5 100644
--- a/openmp/libomptarget/DeviceRTL/src/State.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/State.cpp
@@ -9,8 +9,8 @@
//===----------------------------------------------------------------------===//
#include "State.h"
+#include "Configuration.h"
#include "Debug.h"
-#include "Environment.h"
#include "Interface.h"
#include "Mapping.h"
#include "Synchronization.h"
@@ -34,9 +34,6 @@ constexpr const uint32_t Alignment = 16;
extern unsigned char DynamicSharedBuffer[] __attribute__((aligned(Alignment)));
#pragma omp allocate(DynamicSharedBuffer) allocator(omp_pteam_mem_alloc)
-/// The kernel environment passed to the init method by the compiler.
-static KernelEnvironmentTy *SHARED(KernelEnvironmentPtr);
-
namespace {
/// Fallback implementations are missing to trigger a link time error.
@@ -245,19 +242,15 @@ int returnValIfLevelIsActive(int Level, int Val, int DefaultVal,
} // namespace
-void state::init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment) {
+void state::init(bool IsSPMD) {
SharedMemorySmartStack.init(IsSPMD);
if (mapping::isInitialThreadInLevel0(IsSPMD)) {
TeamState.init(IsSPMD);
+ DebugEntryRAII::init();
ThreadStates = nullptr;
- KernelEnvironmentPtr = &KernelEnvironment;
}
}
-KernelEnvironmentTy &state::getKernelEnvironment() {
- return *KernelEnvironmentPtr;
-}
-
void state::enterDataEnvironment(IdentTy *Ident) {
ASSERT(config::mayUseThreadStates(),
"Thread state modified while explicitly disabled!");
diff --git a/openmp/libomptarget/include/DeviceEnvironment.h b/openmp/libomptarget/include/DeviceEnvironment.h
new file mode 100644
index 00000000000000..4260002a1f0361
--- /dev/null
+++ b/openmp/libomptarget/include/DeviceEnvironment.h
@@ -0,0 +1,26 @@
+//===---- device_environment.h - OpenMP GPU device environment ---- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Global device environment
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _OMPTARGET_DEVICE_ENVIRONMENT_H_
+#define _OMPTARGET_DEVICE_ENVIRONMENT_H_
+
+// deviceRTL uses <stdint> and DeviceRTL uses explicit definitions
+
+struct DeviceEnvironmentTy {
+ uint32_t DebugKind;
+ uint32_t NumDevices;
+ uint32_t DeviceNum;
+ uint32_t DynamicMemSize;
+ uint64_t ClockFrequency;
+};
+
+#endif
diff --git a/openmp/libomptarget/include/Environment.h b/openmp/libomptarget/include/Environment.h
deleted file mode 100644
index 094ad107461c93..00000000000000
--- a/openmp/libomptarget/include/Environment.h
+++ /dev/null
@@ -1,62 +0,0 @@
-//===------------ Environment.h - OpenMP GPU environments --------- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Environments shared between host and device.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef _OMPTARGET_ENVIRONMENT_H_
-#define _OMPTARGET_ENVIRONMENT_H_
-
-#ifdef OMPTARGET_DEVICE_RUNTIME
-#include "Types.h"
-#else
-#include "SourceInfo.h"
-
-#include <cstdint>
-
-using IdentTy = ident_t;
-#endif
-
-#include "llvm/Frontend/OpenMP/OMPDeviceConstants.h"
-
-struct DeviceEnvironmentTy {
- uint32_t DebugKind;
- uint32_t NumDevices;
- uint32_t DeviceNum;
- uint32_t DynamicMemSize;
- uint64_t ClockFrequency;
-};
-
-// NOTE: Please don't change the order of those members as their indices are
-// used in the middle end. Always add the new data member at the end.
-// Different from KernelEnvironmentTy below, this structure contains members
-// that might be modified at runtime.
-struct DynamicEnvironmentTy {
- /// Current indentation level for the function trace. Only accessed by thread
- /// 0.
- uint16_t DebugIndentionLevel;
-};
-
-// NOTE: Please don't change the order of those members as their indices are
-// used in the middle end. Always add the new data member at the end.
-struct ConfigurationEnvironmentTy {
- uint8_t UseGenericStateMachine;
- uint8_t MayUseNestedParallelism;
- llvm::omp::OMPTgtExecModeFlags ExecMode;
-};
-
-// NOTE: Please don't change the order of those members as their indices are
-// used in the middle end. Always add the new data member at the end.
-struct KernelEnvironmentTy {
- ConfigurationEnvironmentTy Configuration;
- IdentTy *Ident;
- DynamicEnvironmentTy *DynamicEnv;
-};
-
-#endif // _OMPTARGET_ENVIRONMENT_H_
diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
index e443f8be557845..6ca57ddadd930a 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -21,7 +21,7 @@
#include <unordered_map>
#include "Debug.h"
-#include "Environment.h"
+#include "DeviceEnvironment.h"
#include "GlobalHandler.h"
#include "OmptCallback.h"
#include "PluginInterface.h"
diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
index 3782a8ed0bb8ce..4ca6e42698c9c0 100644
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
@@ -706,45 +706,32 @@ Error GenericDeviceTy::registerKernelOffloadEntry(
return Plugin::success();
}
-Expected<KernelEnvironmentTy>
-GenericDeviceTy::getKernelEnvironmentForKernel(StringRef Name,
- DeviceImageTy &Image) {
- // Create a metadata object for the kernel environment object.
- StaticGlobalTy<KernelEnvironmentTy> KernelEnv(Name.data(),
- "_kernel_environment");
-
- // Retrieve kernel environment object for the kernel.
+Expected<OMPTgtExecModeFlags>
+GenericDeviceTy::getExecutionModeForKernel(StringRef Name,
+ DeviceImageTy &Image) {
+ // Create a metadata object for the exec mode global (auto-generated).
+ StaticGlobalTy<llvm::omp::OMPTgtExecModeFlags> ExecModeGlobal(Name.data(),
+ "_exec_mode");
+
+ // Retrieve execution mode for the kernel. This may fail since some kernels
+ // may not have an execution mode.
GenericGlobalHandlerTy &GHandler = Plugin::get().getGlobalHandler();
- if (auto Err = GHandler.readGlobalFromImage(*this, Image, KernelEnv)) {
+ if (auto Err = GHandler.readGlobalFromImage(*this, Image, ExecModeGlobal)) {
// Consume the error since it is acceptable to fail.
[[maybe_unused]] std::string ErrStr = toString(std::move(Err));
- DP("Failed to read kernel environment object for '%s': %s\n", Name.data(),
- ErrStr.data());
-
- return createStringError(inconvertibleErrorCode(), ErrStr);
- }
+ DP("Failed to read execution mode for '%s': %s\n"
+ "Using default SPMD (2) execution mode\n",
+ Name.data(), ErrStr.data());
- return KernelEnv.getValue();
-}
-
-Expected<OMPTgtExecModeFlags>
-GenericDeviceTy::getExecutionModeForKernel(StringRef Name,
- DeviceImageTy &Image) {
- auto KernelEnvOrError = getKernelEnvironmentForKernel(Name, Image);
- if (!KernelEnvOrError) {
- (void)KernelEnvOrError.takeError();
return OMP_TGT_EXEC_MODE_SPMD;
}
- auto &KernelEnv = *KernelEnvOrError;
- auto ExecMode = KernelEnv.Configuration.ExecMode;
-
// Check that the retrieved execution mode is valid.
- if (!GenericKernelTy::isValidExecutionMode(ExecMode))
- return Plugin::error("Invalid execution mode %d for '%s'", ExecMode,
- Name.data());
+ if (!GenericKernelTy::isValidExecutionMode(ExecModeGlobal.getValue()))
+ return Plugin::error("Invalid execution mode %d for '%s'",
+ ExecModeGlobal.getValue(), Name.data());
- return ExecMode;
+ return ExecModeGlobal.getValue();
}
Error PinnedAllocationMapTy::insertEntry(void *HstPtr, void *DevAccessiblePtr,
diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
index 15e840b92b3f14..f21c2659a140b6 100644
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
@@ -20,7 +20,7 @@
#include <vector>
#include "Debug.h"
-#include "Environment.h"
+#include "DeviceEnvironment.h"
#include "GlobalHandler.h"
#include "JIT.h"
#include "MemoryManager.h"
@@ -868,12 +868,6 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
/// Internal representation for OMPT device (initialize & finalize)
std::atomic<bool> OmptInitialized;
#endif
-
-private:
-
- /// Return the kernel environment object for kernel \p Name.
- Expected<KernelEnvironmentTy>
- getKernelEnvironmentForKernel(StringRef Name, DeviceImageTy &Image);
};
/// Class implementing common functionalities of offload plugins. Each plugin
diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
index eb1400b7f38610..f05fbb593fb354 100644
--- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
@@ -17,7 +17,7 @@
#include <unordered_map>
#include "Debug.h"
-#include "Environment.h"
+#include "DeviceEnvironment.h"
#include "GlobalHandler.h"
#include "OmptCallback.h"
#include "PluginInterface.h"
diff --git a/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
index 736611d2e8af5d..a47015e2fa29b1 100644
--- a/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
@@ -17,7 +17,7 @@
#include <unordered_map>
#include "Debug.h"
-#include "Environment.h"
+#include "DeviceEnvironment.h"
#include "GlobalHandler.h"
#include "PluginInterface.h"
#include "omptarget.h"
More information about the Openmp-commits
mailing list